Imputation
We generated a plot to see all the missing values in the sample.
#<div style="border: 1px solid #ddd; padding: 5px; overflow-y: scroll; height:400px; overflow-x: scroll; width:100%">
library(dplyr)
library(ggplot2)
vector_variables<-
c("tipo_de_programa_2", "estado_conyugal_2", "edad_al_ing_grupos", "escolaridad_rec", "sus_principal_mod", "freq_cons_sus_prin", "compromiso_biopsicosocial", "tenencia_de_la_vivienda_mod", "num_otras_sus_mod", "numero_de_hijos_mod_rec", "motivodeegreso_mod_imp","tipo_de_plan_res")
missing.values<-
CONS_C1_df_dup_SEP_2020_women %>%
rowwise %>%
dplyr::mutate_at(.vars = vars(vector_variables),
.funs = ~ifelse(is.na(.), 1, 0)) %>%
dplyr::ungroup() %>%
dplyr::summarise_at(vars(vector_variables),~sum(.))
#t(missing.values)
miss_val_bar<-
melt(missing.values) %>%
mutate(perc=scales::percent(value/nrow(CONS_C1_df_dup_SEP_2020_women))) %>%
arrange(desc(perc))
plot_miss<-
missing.values %>%
data.table::melt() %>% #condicion_ocupacional_corr
dplyr::filter(!variable %in% c("row", "hash_key", "dias_treat_imp_sin_na", "dup")) %>%
dplyr::mutate(perc= value/sum(nrow(CONS_C1_df_dup_SEP_2020_women))) %>%
dplyr::mutate(label_text= paste0("Variable= ",variable,"<br>n= ",value,"<br>",scales::percent(round(perc,3)))) %>%
dplyr::mutate(perc=perc*100) %>%
ggplot() +
geom_bar(aes(x=factor(variable), y=perc,label= label_text), stat = 'identity') +
sjPlot::theme_sjplot()+
# scale_y_continuous(limits=c(0,1), labels=percent)+
theme(axis.text.x = element_text(angle = 90, hjust = 1, size=9))+
labs(x=NULL, y="% of Missing Values", caption=paste0("Nota. Percentage of missing values (n= ",sum(nrow(CONS_C1_df_dup_SEP_2020_women)),")"))
ggplotly(plot_miss, tooltip = c("label_text"))%>% layout(xaxis= list(showticklabels = T), height = 600, width=800) %>% layout(yaxis = list(tickformat='%', range = c(0, 5)))
#</div>
From the figure above, we could see that the Tenure status of households (tenencia_de_la_vivienda_mod) and the Biopsychosocial Involvement (compromiso_biopsicosocial) had a proportion of missing values, but no greater than 5%. This is why we imputed these values under MAR assumption.
vector_variables_only_for_imputation<-
c("row", "hash_key", "tipo_de_programa_2", "estado_conyugal_2", "edad_al_ing_grupos", "escolaridad_rec", "sus_principal_mod", "freq_cons_sus_prin", "compromiso_biopsicosocial", "tenencia_de_la_vivienda_mod", "num_otras_sus_mod", "numero_de_hijos_mod_rec","motivodeegreso_mod_imp","tipo_de_plan_res")
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#HACER BASE ESPECIAL QUE CONTENGA UNA VARIABLE DE EDAD DE INICIO DE CONSUMO DE SUSTANCIA PRINCIPAL PARA EQUIPARAR
CONS_C1_df_dup_SEP_2020_women_miss<-
CONS_C1_df_dup_SEP_2020_women %>%
#dplyr::group_by(hash_key) %>%
#dplyr::mutate(rn=row_number()) %>%
#dplyr::ungroup() %>%
#:#:#:#:#:#:#:#:#:#:#:
# ORDINALIZAR LAS VARIABLES ORDINALES:
dplyr::select_(.dots = vector_variables_only_for_imputation) %>%
data.table::data.table()
#CONS_C1_df_dup_SEP_2020 %>% janitor::tabyl(evaluacindelprocesoteraputico)
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
library(Amelia)
amelia_fit <- amelia(CONS_C1_df_dup_SEP_2020_women_miss,
#Warning message:
#In amcheck(x = x, m = m, idvars = numopts$idvars, priors = priors, :
#The number of categories in one of the variables marked nominal has greater than 10 categories. Check nominal specification.
m=61,
parallel = "multicore",
idvars="row",
noms= c("tipo_de_programa_2", "estado_conyugal_2", "sus_principal_mod", "tenencia_de_la_vivienda_mod", "numero_de_hijos_mod_rec","motivodeegreso_mod_imp", "tipo_de_plan_res"),
ords= c("edad_al_ing_grupos", "escolaridad_rec", "freq_cons_sus_prin", "compromiso_biopsicosocial", "num_otras_sus_mod"),
cs = "hash_key",
incheck = TRUE)
# Se sacó el servicio de salud porque tiene mucha información: The number of categories in one of the variables marked nominal has greater than 10 categories. Check nominal specification.
#Error in yy %*% unique(na.omit(x.orig[, i])) : non-conformable arguments.
Age at Admission to Treatment (in groups)
We started looking over the missing values in the age at admission (in groups) (n=4).
#On this graph, a y = x line indicates the line of perfect agreement; that is, if the imputation model was a perfect predictor of the true value, all the imputations would fall on this line
no_mostrar=0
if(no_mostrar==1){
res <- {
setTimeLimit(nn_K*500)
ovr_imp_edad_ini_cons<-overimpute(amelia_fit, var = "edad_al_ing_grupos")
}
}
paste0("Users that had more than one treatment with no date of admission: ",CONS_C1_df_dup_SEP_2020_women_miss %>%
dplyr::group_by(hash_key) %>%
dplyr::mutate(na_edad_ing=sum(is.na(edad_al_ing_grupos))) %>%
dplyr::ungroup() %>%
dplyr::filter(na_edad_ing>0) %>%
dplyr::group_by(hash_key) %>%
dplyr::summarise(n=n()) %>% dplyr::filter(n>1) %>% nrow())
## [1] "Users that had more than one treatment with no date of admission: 0"
#Hay poca relación en las imputaciones.
#table(is.na(CONS_C1_df_dup_SEP_2020_women_not_miss$edad_al_ing),exclude=NULL)
edad_al_ing_grupos_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$edad_al_ing_grupos,
amelia_fit$imputations$imp2$edad_al_ing_grupos,
amelia_fit$imputations$imp3$edad_al_ing_grupos,
amelia_fit$imputations$imp4$edad_al_ing_grupos,
amelia_fit$imputations$imp5$edad_al_ing_grupos,
amelia_fit$imputations$imp6$edad_al_ing_grupos,
amelia_fit$imputations$imp7$edad_al_ing_grupos,
amelia_fit$imputations$imp8$edad_al_ing_grupos,
amelia_fit$imputations$imp9$edad_al_ing_grupos,
amelia_fit$imputations$imp10$edad_al_ing_grupos,
amelia_fit$imputations$imp11$edad_al_ing_grupos,
amelia_fit$imputations$imp12$edad_al_ing_grupos,
amelia_fit$imputations$imp13$edad_al_ing_grupos,
amelia_fit$imputations$imp14$edad_al_ing_grupos,
amelia_fit$imputations$imp15$edad_al_ing_grupos,
amelia_fit$imputations$imp16$edad_al_ing_grupos,
amelia_fit$imputations$imp17$edad_al_ing_grupos,
amelia_fit$imputations$imp18$edad_al_ing_grupos,
amelia_fit$imputations$imp19$edad_al_ing_grupos,
amelia_fit$imputations$imp20$edad_al_ing_grupos,
amelia_fit$imputations$imp21$edad_al_ing_grupos,
amelia_fit$imputations$imp22$edad_al_ing_grupos,
amelia_fit$imputations$imp23$edad_al_ing_grupos,
amelia_fit$imputations$imp24$edad_al_ing_grupos,
amelia_fit$imputations$imp25$edad_al_ing_grupos,
amelia_fit$imputations$imp26$edad_al_ing_grupos,
amelia_fit$imputations$imp27$edad_al_ing_grupos,
amelia_fit$imputations$imp28$edad_al_ing_grupos,
amelia_fit$imputations$imp29$edad_al_ing_grupos,
amelia_fit$imputations$imp30$edad_al_ing_grupos,
amelia_fit$imputations$imp31$edad_al_ing_grupos,
amelia_fit$imputations$imp32$edad_al_ing_grupos,
amelia_fit$imputations$imp33$edad_al_ing_grupos,
amelia_fit$imputations$imp34$edad_al_ing_grupos,
amelia_fit$imputations$imp35$edad_al_ing_grupos,
amelia_fit$imputations$imp36$edad_al_ing_grupos,
amelia_fit$imputations$imp37$edad_al_ing_grupos,
amelia_fit$imputations$imp38$edad_al_ing_grupos,
amelia_fit$imputations$imp39$edad_al_ing_grupos,
amelia_fit$imputations$imp40$edad_al_ing_grupos,
amelia_fit$imputations$imp41$edad_al_ing_grupos,
amelia_fit$imputations$imp42$edad_al_ing_grupos,
amelia_fit$imputations$imp43$edad_al_ing_grupos,
amelia_fit$imputations$imp44$edad_al_ing_grupos,
amelia_fit$imputations$imp45$edad_al_ing_grupos,
amelia_fit$imputations$imp46$edad_al_ing_grupos,
amelia_fit$imputations$imp47$edad_al_ing_grupos,
amelia_fit$imputations$imp48$edad_al_ing_grupos,
amelia_fit$imputations$imp49$edad_al_ing_grupos,
amelia_fit$imputations$imp50$edad_al_ing_grupos,
amelia_fit$imputations$imp51$edad_al_ing_grupos,
amelia_fit$imputations$imp52$edad_al_ing_grupos,
amelia_fit$imputations$imp53$edad_al_ing_grupos,
amelia_fit$imputations$imp54$edad_al_ing_grupos,
amelia_fit$imputations$imp55$edad_al_ing_grupos,
amelia_fit$imputations$imp56$edad_al_ing_grupos,
amelia_fit$imputations$imp57$edad_al_ing_grupos,
amelia_fit$imputations$imp58$edad_al_ing_grupos,
amelia_fit$imputations$imp59$edad_al_ing_grupos,
amelia_fit$imputations$imp60$edad_al_ing_grupos,
amelia_fit$imputations$imp61$edad_al_ing_grupos
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
#18-29 30-39 40-49 50+
janitor::clean_names() %>%
dplyr::arrange(amelia_fit_imputations_imp1_row) %>%
dplyr::ungroup() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::summarise(edad_18_29=sum(value == "18-29",na.rm=T),
edad_30_39=sum(value == "30-39",na.rm=T),
edad_40_49=sum(value == "40-49",na.rm=T),
edad_50mas=sum(value =="50+",na.rm=T)) %>%
dplyr::ungroup() %>%
#dplyr::mutate(edad_suma = base::rowSums(dplyr::select(is.na(.),starts_with("edad"))))
dplyr::mutate(ties= base::rowSums(dplyr::select(.,starts_with("edad"))>0)) %>%
dplyr::mutate(edad_al_ing_grupos_imp= dplyr::case_when(
(edad_18_29> edad_30_39) & (edad_18_29> edad_40_49) & (edad_18_29> edad_50mas)~"18-29",
(edad_30_39> edad_18_29) & (edad_30_39> edad_40_49) & (edad_30_39> edad_50mas)~"30-39",
(edad_40_49> edad_18_29) & (edad_40_49> edad_30_39) & (edad_40_49> edad_50mas)~"40-49",
(edad_50mas> edad_18_29) & (edad_50mas> edad_30_39) & (edad_50mas> edad_40_49)~"50+"
))
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
# Reemplazo los valores perdidos:
CONS_C1_df_dup_SEP_2020_women_miss0<-
CONS_C1_df_dup_SEP_2020_women_miss %>%
dplyr::left_join(edad_al_ing_grupos_imputed,by=c("row"="amelia_fit_imputations_imp1_row")) %>%
#si la edad al ingreso no existe, el valor promedio imutado es
dplyr::mutate(edad_al_ing_grupos=dplyr::case_when(is.na(edad_al_ing_grupos)~edad_al_ing_grupos_imp,
T~as.character(edad_al_ing_grupos))) %>%
dplyr::select(-edad_18_29, -edad_30_39, -edad_40_49, -edad_50mas, -ties, -edad_al_ing_grupos_imp)
if(nrow(CONS_C1_df_dup_SEP_2020_women_miss0)-nrow(CONS_C1_df_dup_SEP_2020_women_miss)>0){
warning("AGS: Some rows were added in the imputation")}
After the imputation, there were no missing cases left.
Primary or main substance
Then we imputed the primary/main substance at admission (n= 1).
# Ver distintos valores propuestos para sustancia de inciio
sus_principal_mod_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$sus_principal_mod,
amelia_fit$imputations$imp2$sus_principal_mod,
amelia_fit$imputations$imp3$sus_principal_mod,
amelia_fit$imputations$imp4$sus_principal_mod,
amelia_fit$imputations$imp5$sus_principal_mod,
amelia_fit$imputations$imp6$sus_principal_mod,
amelia_fit$imputations$imp7$sus_principal_mod,
amelia_fit$imputations$imp8$sus_principal_mod,
amelia_fit$imputations$imp9$sus_principal_mod,
amelia_fit$imputations$imp10$sus_principal_mod,
amelia_fit$imputations$imp11$sus_principal_mod,
amelia_fit$imputations$imp12$sus_principal_mod,
amelia_fit$imputations$imp13$sus_principal_mod,
amelia_fit$imputations$imp14$sus_principal_mod,
amelia_fit$imputations$imp15$sus_principal_mod,
amelia_fit$imputations$imp16$sus_principal_mod,
amelia_fit$imputations$imp17$sus_principal_mod,
amelia_fit$imputations$imp18$sus_principal_mod,
amelia_fit$imputations$imp19$sus_principal_mod,
amelia_fit$imputations$imp20$sus_principal_mod,
amelia_fit$imputations$imp21$sus_principal_mod,
amelia_fit$imputations$imp22$sus_principal_mod,
amelia_fit$imputations$imp23$sus_principal_mod,
amelia_fit$imputations$imp24$sus_principal_mod,
amelia_fit$imputations$imp25$sus_principal_mod,
amelia_fit$imputations$imp26$sus_principal_mod,
amelia_fit$imputations$imp27$sus_principal_mod,
amelia_fit$imputations$imp28$sus_principal_mod,
amelia_fit$imputations$imp29$sus_principal_mod,
amelia_fit$imputations$imp30$sus_principal_mod,
amelia_fit$imputations$imp31$sus_principal_mod,
amelia_fit$imputations$imp32$sus_principal_mod,
amelia_fit$imputations$imp33$sus_principal_mod,
amelia_fit$imputations$imp34$sus_principal_mod,
amelia_fit$imputations$imp35$sus_principal_mod,
amelia_fit$imputations$imp36$sus_principal_mod,
amelia_fit$imputations$imp37$sus_principal_mod,
amelia_fit$imputations$imp38$sus_principal_mod,
amelia_fit$imputations$imp39$sus_principal_mod,
amelia_fit$imputations$imp40$sus_principal_mod,
amelia_fit$imputations$imp41$sus_principal_mod,
amelia_fit$imputations$imp42$sus_principal_mod,
amelia_fit$imputations$imp43$sus_principal_mod,
amelia_fit$imputations$imp44$sus_principal_mod,
amelia_fit$imputations$imp45$sus_principal_mod,
amelia_fit$imputations$imp46$sus_principal_mod,
amelia_fit$imputations$imp47$sus_principal_mod,
amelia_fit$imputations$imp48$sus_principal_mod,
amelia_fit$imputations$imp49$sus_principal_mod,
amelia_fit$imputations$imp50$sus_principal_mod,
amelia_fit$imputations$imp51$sus_principal_mod,
amelia_fit$imputations$imp52$sus_principal_mod,
amelia_fit$imputations$imp53$sus_principal_mod,
amelia_fit$imputations$imp54$sus_principal_mod,
amelia_fit$imputations$imp55$sus_principal_mod,
amelia_fit$imputations$imp56$sus_principal_mod,
amelia_fit$imputations$imp57$sus_principal_mod,
amelia_fit$imputations$imp58$sus_principal_mod,
amelia_fit$imputations$imp59$sus_principal_mod,
amelia_fit$imputations$imp60$sus_principal_mod,
amelia_fit$imputations$imp61$sus_principal_mod
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
#18-29 30-39 40-49 50+
janitor::clean_names() %>%
dplyr::arrange(amelia_fit_imputations_imp1_row) %>%
dplyr::ungroup() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::summarise(sus_prin_mar=sum(value == "Marijuana",na.rm=T),
sus_prin_oh=sum(value == "Alcohol",na.rm=T),
sus_prin_pb=sum(value == "Cocaine paste",na.rm=T),
sus_prin_coc=sum(value =="Cocaine hydrochloride",na.rm=T),
sus_prin_other=sum(value =="Other",na.rm=T)) %>%
dplyr::ungroup() %>%
dplyr::mutate(ties= base::rowSums(dplyr::select(.,starts_with("sus_prin_"))>0)) %>%
dplyr::mutate(sus_principal_mod_imp= dplyr::case_when(
(sus_prin_mar> sus_prin_oh)& (sus_prin_mar> sus_prin_pb)& (sus_prin_mar> sus_prin_coc)& (sus_prin_mar> sus_prin_other)~"Marijuana",
(sus_prin_oh> sus_prin_mar)& (sus_prin_oh> sus_prin_pb)& (sus_prin_oh> sus_prin_coc)& (sus_prin_oh> sus_prin_other)~"Alcohol",
(sus_prin_pb> sus_prin_mar)& (sus_prin_pb> sus_prin_oh)& (sus_prin_pb> sus_prin_coc)& (sus_prin_pb> sus_prin_other)~"Cocaine paste",
(sus_prin_coc> sus_prin_mar)& (sus_prin_coc> sus_prin_oh)& (sus_prin_coc> sus_prin_pb)& (sus_prin_coc> sus_prin_other)~"Cocaine hydrochloride",
(sus_prin_other> sus_prin_mar)& (sus_prin_other> sus_prin_oh)& (sus_prin_other> sus_prin_pb)& (sus_prin_other> sus_prin_coc)~"Cocaine hydrochloride"
))
## `summarise()` ungrouping output (override with `.groups` argument)
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_women_miss1<-
CONS_C1_df_dup_SEP_2020_women_miss0 %>%
dplyr::left_join(sus_principal_mod_imputed, by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(sus_principal_mod=factor(dplyr::case_when(is.na(sus_principal_mod)~as.character(sus_principal_mod_imp),
TRUE~as.character(sus_principal_mod)))) %>%
dplyr::select(-c(sus_prin_mar, sus_prin_oh, sus_prin_pb, sus_prin_coc, sus_prin_other, ties, sus_principal_mod_imp)) %>%
data.table()
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
if(nrow(CONS_C1_df_dup_SEP_2020_women_miss1)-nrow(CONS_C1_df_dup_SEP_2020_women_miss0)>0){
warning("AGS: Some rows were added in the imputation")}
As a result of the imputations, there were no missing values.
Co-occurring SUD
Another variable worth imputing is the presence of additional Substance Use Disorders (n= 1).
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
num_otras_sus_mod_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$num_otras_sus_mod,
amelia_fit$imputations$imp2$num_otras_sus_mod,
amelia_fit$imputations$imp3$num_otras_sus_mod,
amelia_fit$imputations$imp4$num_otras_sus_mod,
amelia_fit$imputations$imp5$num_otras_sus_mod,
amelia_fit$imputations$imp6$num_otras_sus_mod,
amelia_fit$imputations$imp7$num_otras_sus_mod,
amelia_fit$imputations$imp8$num_otras_sus_mod,
amelia_fit$imputations$imp9$num_otras_sus_mod,
amelia_fit$imputations$imp10$num_otras_sus_mod,
amelia_fit$imputations$imp11$num_otras_sus_mod,
amelia_fit$imputations$imp12$num_otras_sus_mod,
amelia_fit$imputations$imp13$num_otras_sus_mod,
amelia_fit$imputations$imp14$num_otras_sus_mod,
amelia_fit$imputations$imp15$num_otras_sus_mod,
amelia_fit$imputations$imp16$num_otras_sus_mod,
amelia_fit$imputations$imp17$num_otras_sus_mod,
amelia_fit$imputations$imp18$num_otras_sus_mod,
amelia_fit$imputations$imp19$num_otras_sus_mod,
amelia_fit$imputations$imp20$num_otras_sus_mod,
amelia_fit$imputations$imp21$num_otras_sus_mod,
amelia_fit$imputations$imp22$num_otras_sus_mod,
amelia_fit$imputations$imp23$num_otras_sus_mod,
amelia_fit$imputations$imp24$num_otras_sus_mod,
amelia_fit$imputations$imp25$num_otras_sus_mod,
amelia_fit$imputations$imp26$num_otras_sus_mod,
amelia_fit$imputations$imp27$num_otras_sus_mod,
amelia_fit$imputations$imp28$num_otras_sus_mod,
amelia_fit$imputations$imp29$num_otras_sus_mod,
amelia_fit$imputations$imp30$num_otras_sus_mod,
amelia_fit$imputations$imp31$num_otras_sus_mod,
amelia_fit$imputations$imp32$num_otras_sus_mod,
amelia_fit$imputations$imp33$num_otras_sus_mod,
amelia_fit$imputations$imp34$num_otras_sus_mod,
amelia_fit$imputations$imp35$num_otras_sus_mod,
amelia_fit$imputations$imp36$num_otras_sus_mod,
amelia_fit$imputations$imp37$num_otras_sus_mod,
amelia_fit$imputations$imp38$num_otras_sus_mod,
amelia_fit$imputations$imp39$num_otras_sus_mod,
amelia_fit$imputations$imp40$num_otras_sus_mod,
amelia_fit$imputations$imp41$num_otras_sus_mod,
amelia_fit$imputations$imp42$num_otras_sus_mod,
amelia_fit$imputations$imp43$num_otras_sus_mod,
amelia_fit$imputations$imp44$num_otras_sus_mod,
amelia_fit$imputations$imp45$num_otras_sus_mod,
amelia_fit$imputations$imp46$num_otras_sus_mod,
amelia_fit$imputations$imp47$num_otras_sus_mod,
amelia_fit$imputations$imp48$num_otras_sus_mod,
amelia_fit$imputations$imp49$num_otras_sus_mod,
amelia_fit$imputations$imp50$num_otras_sus_mod,
amelia_fit$imputations$imp51$num_otras_sus_mod,
amelia_fit$imputations$imp52$num_otras_sus_mod,
amelia_fit$imputations$imp53$num_otras_sus_mod,
amelia_fit$imputations$imp54$num_otras_sus_mod,
amelia_fit$imputations$imp55$num_otras_sus_mod,
amelia_fit$imputations$imp56$num_otras_sus_mod,
amelia_fit$imputations$imp57$num_otras_sus_mod,
amelia_fit$imputations$imp58$num_otras_sus_mod,
amelia_fit$imputations$imp59$num_otras_sus_mod,
amelia_fit$imputations$imp60$num_otras_sus_mod,
amelia_fit$imputations$imp61$num_otras_sus_mod
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
dplyr::arrange(amelia_fit_imputations_imp1_row) %>%
dplyr::ungroup() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::summarise(no_ad_subs=sum(value == "No additional substance",na.rm=T),
one_ad_subs=sum(value == "One additional substance",na.rm=T),
more_one_ad_subs=sum(value == "More than one additional substance",na.rm=T)) %>%
dplyr::ungroup() %>%
# Hacer la variable imputada
dplyr::mutate(num_otras_sus_mod_imp= dplyr::case_when(
(no_ad_subs>one_ad_subs)&(no_ad_subs>more_one_ad_subs)~"No additional substance",
(one_ad_subs>no_ad_subs)&(one_ad_subs>more_one_ad_subs)~"One additional substance",
(more_one_ad_subs>no_ad_subs)&(more_one_ad_subs>one_ad_subs)~"More than one additional substance",
T~NA_character_))
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
CONS_C1_df_dup_SEP_2020_women_miss2<-
CONS_C1_df_dup_SEP_2020_women_miss1 %>%
dplyr::left_join(num_otras_sus_mod_imputed[,c("amelia_fit_imputations_imp1_row","num_otras_sus_mod_imp")],
by=c("row"="amelia_fit_imputations_imp1_row")) %>%
#si la edad al ingreso no existe, el valor promedio imutado es
dplyr::mutate(num_otras_sus_mod=
dplyr::case_when(is.na(num_otras_sus_mod)~num_otras_sus_mod_imp,
T~as.character(num_otras_sus_mod))) %>%
dplyr::select(-num_otras_sus_mod_imp)
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#is.na(edad_ini_cons) & is.na(edad_ini_sus_prin) & is.na(min_edad_al_ing)~as.numeric(avg),
#table(is.na(CONS_C1_df_dup_SEP_2020_women_miss1$edad_ini_cons))
paste0("Number of rows with values that did not fulfilled the conditions: ",CONS_C1_df_dup_SEP_2020_women_miss2 %>% dplyr::filter(is.na(num_otras_sus_mod)) %>%
dplyr::select(hash_key, edad_al_ing_grupos,num_otras_sus_mod) %>% nrow())
## [1] "Number of rows with values that did not fulfilled the conditions: 0"
#Lo importante es tener en cuenta que las imputaciones se hicieron por filas; no, en cambio, ahora debemos reemplazar aquellos casos que tienen perdidos (no cumplieron con las condiciones) con el valor mínimo
if(nrow(CONS_C1_df_dup_SEP_2020_women_miss2)-nrow(CONS_C1_df_dup_SEP_2020_women_miss1)>0){
warning("AGS: Some rows were added in the imputation")}
As a result of the imputations, there were 0 values of co-occurring SUDs available.
Frequency of Use of the Primary Substance at Admission
Another variable that is worth imputing is the Frequency of use of primary drug at admission (n= 116). In case of ties, we selected the imputed values with the value with the most frequent drug use.
# Ver distintos valores propuestos para sustancia de inciio
freq_cons_sus_prin_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$freq_cons_sus_prin,
amelia_fit$imputations$imp2$freq_cons_sus_prin,
amelia_fit$imputations$imp3$freq_cons_sus_prin,
amelia_fit$imputations$imp4$freq_cons_sus_prin,
amelia_fit$imputations$imp5$freq_cons_sus_prin,
amelia_fit$imputations$imp6$freq_cons_sus_prin,
amelia_fit$imputations$imp7$freq_cons_sus_prin,
amelia_fit$imputations$imp8$freq_cons_sus_prin,
amelia_fit$imputations$imp9$freq_cons_sus_prin,
amelia_fit$imputations$imp10$freq_cons_sus_prin,
amelia_fit$imputations$imp11$freq_cons_sus_prin,
amelia_fit$imputations$imp12$freq_cons_sus_prin,
amelia_fit$imputations$imp13$freq_cons_sus_prin,
amelia_fit$imputations$imp14$freq_cons_sus_prin,
amelia_fit$imputations$imp15$freq_cons_sus_prin,
amelia_fit$imputations$imp16$freq_cons_sus_prin,
amelia_fit$imputations$imp17$freq_cons_sus_prin,
amelia_fit$imputations$imp18$freq_cons_sus_prin,
amelia_fit$imputations$imp19$freq_cons_sus_prin,
amelia_fit$imputations$imp20$freq_cons_sus_prin,
amelia_fit$imputations$imp21$freq_cons_sus_prin,
amelia_fit$imputations$imp22$freq_cons_sus_prin,
amelia_fit$imputations$imp23$freq_cons_sus_prin,
amelia_fit$imputations$imp24$freq_cons_sus_prin,
amelia_fit$imputations$imp25$freq_cons_sus_prin,
amelia_fit$imputations$imp26$freq_cons_sus_prin,
amelia_fit$imputations$imp27$freq_cons_sus_prin,
amelia_fit$imputations$imp28$freq_cons_sus_prin,
amelia_fit$imputations$imp29$freq_cons_sus_prin,
amelia_fit$imputations$imp30$freq_cons_sus_prin,
amelia_fit$imputations$imp31$freq_cons_sus_prin,
amelia_fit$imputations$imp32$freq_cons_sus_prin,
amelia_fit$imputations$imp33$freq_cons_sus_prin,
amelia_fit$imputations$imp34$freq_cons_sus_prin,
amelia_fit$imputations$imp35$freq_cons_sus_prin,
amelia_fit$imputations$imp36$freq_cons_sus_prin,
amelia_fit$imputations$imp37$freq_cons_sus_prin,
amelia_fit$imputations$imp38$freq_cons_sus_prin,
amelia_fit$imputations$imp39$freq_cons_sus_prin,
amelia_fit$imputations$imp40$freq_cons_sus_prin,
amelia_fit$imputations$imp41$freq_cons_sus_prin,
amelia_fit$imputations$imp42$freq_cons_sus_prin,
amelia_fit$imputations$imp43$freq_cons_sus_prin,
amelia_fit$imputations$imp44$freq_cons_sus_prin,
amelia_fit$imputations$imp45$freq_cons_sus_prin,
amelia_fit$imputations$imp46$freq_cons_sus_prin,
amelia_fit$imputations$imp47$freq_cons_sus_prin,
amelia_fit$imputations$imp48$freq_cons_sus_prin,
amelia_fit$imputations$imp49$freq_cons_sus_prin,
amelia_fit$imputations$imp50$freq_cons_sus_prin,
amelia_fit$imputations$imp51$freq_cons_sus_prin,
amelia_fit$imputations$imp52$freq_cons_sus_prin,
amelia_fit$imputations$imp53$freq_cons_sus_prin,
amelia_fit$imputations$imp54$freq_cons_sus_prin,
amelia_fit$imputations$imp55$freq_cons_sus_prin,
amelia_fit$imputations$imp56$freq_cons_sus_prin,
amelia_fit$imputations$imp57$freq_cons_sus_prin,
amelia_fit$imputations$imp58$freq_cons_sus_prin,
amelia_fit$imputations$imp59$freq_cons_sus_prin,
amelia_fit$imputations$imp60$freq_cons_sus_prin,
amelia_fit$imputations$imp61$freq_cons_sus_prin
)
freq_cons_sus_prin_imputed<-
freq_cons_sus_prin_imputed %>%
data.frame() %>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("1 day a week or more",as.character(.))~1,TRUE~0), .names="1_day_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("2 to 3 days a week",as.character(.))~1,TRUE~0), .names="2_3_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("4 to 6 days a week",as.character(.))~1,TRUE~0), .names="4_6_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("Less than 1 day a week",as.character(.))~1,TRUE~0), .names="less_1_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("Did not use",as.character(.))~1,TRUE~0), .names="did_not_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("Daily",as.character(.))~1,TRUE~0), .names="daily_{col}"))%>%
dplyr::mutate(freq_cons_sus_prin_daily = base::rowSums(dplyr::select(., starts_with("daily_")))) %>%
dplyr::mutate(freq_cons_sus_prin_4_6 = base::rowSums(dplyr::select(., starts_with("4_6_"))))%>%
dplyr::mutate(freq_cons_sus_prin_2_3 = base::rowSums(dplyr::select(., starts_with("2_3_"))))%>%
dplyr::mutate(freq_cons_sus_prin_1_day = base::rowSums(dplyr::select(., starts_with("1_day_"))))%>%
dplyr::mutate(freq_cons_sus_prin_less_1 = base::rowSums(dplyr::select(., starts_with("less_1_"))))%>%
dplyr::mutate(freq_cons_sus_prin_did_not = base::rowSums(dplyr::select(., starts_with("did_not_")))) %>%
#dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_1_day>0~1,TRUE~0)) %>%
dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_2_3>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>%
dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_4_6>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>%
dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_less_1>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>%
dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_did_not>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>%
dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_daily>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>%
#hierarchy
dplyr::mutate(freq_cons_sus_prin_to_imputation=
dplyr::case_when(freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_daily>0~"Daily",
freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_4_6>0~"4 to 6 days a week",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_2_3>0~"2 to 3 days a week",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_1_day>0~"1 day a week or more",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_less_1>0~"Less than 1 day a week",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_did_not>0~"Did not use",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_daily>0~"Daily",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_4_6>0~"4 to 6 days a week",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_2_3>0~"2 to 3 days a week",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_1_day>0~"1 day a week or more",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_less_1>0~"Less than 1 day a week",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_did_not>0~"Did not use")) %>%
janitor::clean_names()
freq_cons_sus_prin_imputed<-
dplyr::select(freq_cons_sus_prin_imputed,amelia_fit_imputations_imp1_row,freq_cons_sus_prin_to_imputation)
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_women_miss3<-
CONS_C1_df_dup_SEP_2020_women_miss2 %>%
dplyr::left_join(freq_cons_sus_prin_imputed, by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(freq_cons_sus_prin=factor(dplyr::case_when(is.na(freq_cons_sus_prin)~as.character(freq_cons_sus_prin_to_imputation), TRUE~as.character(freq_cons_sus_prin)))) %>%
dplyr::select(-freq_cons_sus_prin_to_imputation) %>%
data.table()
if(nrow(CONS_C1_df_dup_SEP_2020_women_miss3)-nrow(CONS_C1_df_dup_SEP_2020_women_miss2)>0){
warning("AGS: Some rows were added in the imputation")}
As a result of the imputations, there were no missing values.
Educational Attainment
Another variable that is worth imputing is the Educational Attainment (n= 115). In case of ties,w imputed for the most vulnerable category among the candidates.
# Ver distintos valores propuestos para sustancia de inciio
escolaridad_rec_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$escolaridad_rec,
amelia_fit$imputations$imp2$escolaridad_rec,
amelia_fit$imputations$imp3$escolaridad_rec,
amelia_fit$imputations$imp4$escolaridad_rec,
amelia_fit$imputations$imp5$escolaridad_rec,
amelia_fit$imputations$imp6$escolaridad_rec,
amelia_fit$imputations$imp7$escolaridad_rec,
amelia_fit$imputations$imp8$escolaridad_rec,
amelia_fit$imputations$imp9$escolaridad_rec,
amelia_fit$imputations$imp10$escolaridad_rec,
amelia_fit$imputations$imp11$escolaridad_rec,
amelia_fit$imputations$imp12$escolaridad_rec,
amelia_fit$imputations$imp13$escolaridad_rec,
amelia_fit$imputations$imp14$escolaridad_rec,
amelia_fit$imputations$imp15$escolaridad_rec,
amelia_fit$imputations$imp16$escolaridad_rec,
amelia_fit$imputations$imp17$escolaridad_rec,
amelia_fit$imputations$imp18$escolaridad_rec,
amelia_fit$imputations$imp19$escolaridad_rec,
amelia_fit$imputations$imp20$escolaridad_rec,
amelia_fit$imputations$imp21$escolaridad_rec,
amelia_fit$imputations$imp22$escolaridad_rec,
amelia_fit$imputations$imp23$escolaridad_rec,
amelia_fit$imputations$imp24$escolaridad_rec,
amelia_fit$imputations$imp25$escolaridad_rec,
amelia_fit$imputations$imp26$escolaridad_rec,
amelia_fit$imputations$imp27$escolaridad_rec,
amelia_fit$imputations$imp28$escolaridad_rec,
amelia_fit$imputations$imp29$escolaridad_rec,
amelia_fit$imputations$imp30$escolaridad_rec,
amelia_fit$imputations$imp31$escolaridad_rec,
amelia_fit$imputations$imp32$escolaridad_rec,
amelia_fit$imputations$imp33$escolaridad_rec,
amelia_fit$imputations$imp34$escolaridad_rec,
amelia_fit$imputations$imp35$escolaridad_rec,
amelia_fit$imputations$imp36$escolaridad_rec,
amelia_fit$imputations$imp37$escolaridad_rec,
amelia_fit$imputations$imp38$escolaridad_rec,
amelia_fit$imputations$imp39$escolaridad_rec,
amelia_fit$imputations$imp40$escolaridad_rec,
amelia_fit$imputations$imp41$escolaridad_rec,
amelia_fit$imputations$imp42$escolaridad_rec,
amelia_fit$imputations$imp43$escolaridad_rec,
amelia_fit$imputations$imp44$escolaridad_rec,
amelia_fit$imputations$imp45$escolaridad_rec,
amelia_fit$imputations$imp46$escolaridad_rec,
amelia_fit$imputations$imp47$escolaridad_rec,
amelia_fit$imputations$imp48$escolaridad_rec,
amelia_fit$imputations$imp49$escolaridad_rec,
amelia_fit$imputations$imp50$escolaridad_rec,
amelia_fit$imputations$imp51$escolaridad_rec,
amelia_fit$imputations$imp52$escolaridad_rec,
amelia_fit$imputations$imp53$escolaridad_rec,
amelia_fit$imputations$imp54$escolaridad_rec,
amelia_fit$imputations$imp55$escolaridad_rec,
amelia_fit$imputations$imp56$escolaridad_rec,
amelia_fit$imputations$imp57$escolaridad_rec,
amelia_fit$imputations$imp58$escolaridad_rec,
amelia_fit$imputations$imp59$escolaridad_rec,
amelia_fit$imputations$imp60$escolaridad_rec,
amelia_fit$imputations$imp61$escolaridad_rec) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
dplyr::arrange(amelia_fit_imputations_imp1_row) %>%
dplyr::ungroup() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
# 1-Mild 2-Moderate 3-Severe
dplyr::summarise(primary_3=sum(value == "3-Completed primary school or less",na.rm=T),
high_sc_2=sum(value == "2-Completed high school or less",na.rm=T),
more_h_sc_1=sum(value =="1-More than high school",na.rm=T)) %>%
dplyr::ungroup() %>%
dplyr::mutate(escolaridad_rec_imp= dplyr::case_when(
(more_h_sc_1>primary_3) & (more_h_sc_1>high_sc_2)~"1-More than high school",
(high_sc_2>primary_3) & (high_sc_2>more_h_sc_1)~"2-Completed high school or less",
(primary_3>more_h_sc_1) & (primary_3>high_sc_2)~"3-Completed primary school or less"
)) %>%
#2) Resolve ties
dplyr::mutate(ties= dplyr::case_when(is.na(escolaridad_rec_imp)~1,T~0)) %>%
dplyr::mutate(escolaridad_rec_imp= dplyr::case_when(ties==1 & ((primary_3>more_h_sc_1)|(primary_3>high_sc_2))~"3-Completed primary school or less", ties==1 & ((high_sc_2>primary_3)|(high_sc_2>more_h_sc_1))~"2-Completed high school or less",
T~escolaridad_rec_imp))
## `summarise()` ungrouping output (override with `.groups` argument)
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
#CONS_C1_df_dup_SEP_2020 %>% janitor::tabyl(motivodeegreso_mod_imp,evaluacindelprocesoteraputico)
CONS_C1_df_dup_SEP_2020_women_miss4<-
CONS_C1_df_dup_SEP_2020_women_miss3 %>%
dplyr::left_join(escolaridad_rec_imputed[,c("amelia_fit_imputations_imp1_row","escolaridad_rec_imp")], by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(escolaridad_rec=factor(dplyr::case_when(is.na(escolaridad_rec)~ escolaridad_rec_imp,
TRUE~as.character(escolaridad_rec)))) %>%
dplyr::mutate(escolaridad_rec=parse_factor(as.character(escolaridad_rec),levels=c("1-More than high school", "2-Completed high school or less","3-Completed primary school or less"), ordered =T,trim_ws=T,include_na =F, locale=locale(encoding = "UTF-8"))) %>%
dplyr::select(-escolaridad_rec_imp) %>%
data.table()
if(nrow(CONS_C1_df_dup_SEP_2020_women_miss4)-nrow(CONS_C1_df_dup_SEP_2020_women_miss3)>0){
warning("AGS: Some rows were added in the imputation")}
We ended having 0 missing values in educational attainment.
Marital status
Additionally, we replaced missing values of the marital status (n=34). Since different marital status were not clearly more vulnerable between each other, we selected the most frequent imputed value among the different imputed databases. Only in case of ties in the candidate values, we resolved them by discarding “married” status, which could be somehow less vulnerable than other categories.
# Ver distintos valores propuestos para estado conyugal
estado_conyugal_2_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$estado_conyugal_2,
amelia_fit$imputations$imp2$estado_conyugal_2,
amelia_fit$imputations$imp3$estado_conyugal_2,
amelia_fit$imputations$imp4$estado_conyugal_2,
amelia_fit$imputations$imp5$estado_conyugal_2,
amelia_fit$imputations$imp6$estado_conyugal_2,
amelia_fit$imputations$imp7$estado_conyugal_2,
amelia_fit$imputations$imp8$estado_conyugal_2,
amelia_fit$imputations$imp9$estado_conyugal_2,
amelia_fit$imputations$imp10$estado_conyugal_2,
amelia_fit$imputations$imp11$estado_conyugal_2,
amelia_fit$imputations$imp12$estado_conyugal_2,
amelia_fit$imputations$imp13$estado_conyugal_2,
amelia_fit$imputations$imp14$estado_conyugal_2,
amelia_fit$imputations$imp15$estado_conyugal_2,
amelia_fit$imputations$imp16$estado_conyugal_2,
amelia_fit$imputations$imp17$estado_conyugal_2,
amelia_fit$imputations$imp18$estado_conyugal_2,
amelia_fit$imputations$imp19$estado_conyugal_2,
amelia_fit$imputations$imp20$estado_conyugal_2,
amelia_fit$imputations$imp21$estado_conyugal_2,
amelia_fit$imputations$imp22$estado_conyugal_2,
amelia_fit$imputations$imp23$estado_conyugal_2,
amelia_fit$imputations$imp24$estado_conyugal_2,
amelia_fit$imputations$imp25$estado_conyugal_2,
amelia_fit$imputations$imp26$estado_conyugal_2,
amelia_fit$imputations$imp27$estado_conyugal_2,
amelia_fit$imputations$imp28$estado_conyugal_2,
amelia_fit$imputations$imp29$estado_conyugal_2,
amelia_fit$imputations$imp30$estado_conyugal_2,
amelia_fit$imputations$imp31$estado_conyugal_2,
amelia_fit$imputations$imp32$estado_conyugal_2,
amelia_fit$imputations$imp33$estado_conyugal_2,
amelia_fit$imputations$imp34$estado_conyugal_2,
amelia_fit$imputations$imp35$estado_conyugal_2,
amelia_fit$imputations$imp36$estado_conyugal_2,
amelia_fit$imputations$imp37$estado_conyugal_2,
amelia_fit$imputations$imp38$estado_conyugal_2,
amelia_fit$imputations$imp39$estado_conyugal_2,
amelia_fit$imputations$imp40$estado_conyugal_2,
amelia_fit$imputations$imp41$estado_conyugal_2,
amelia_fit$imputations$imp42$estado_conyugal_2,
amelia_fit$imputations$imp43$estado_conyugal_2,
amelia_fit$imputations$imp44$estado_conyugal_2,
amelia_fit$imputations$imp45$estado_conyugal_2,
amelia_fit$imputations$imp46$estado_conyugal_2,
amelia_fit$imputations$imp47$estado_conyugal_2,
amelia_fit$imputations$imp48$estado_conyugal_2,
amelia_fit$imputations$imp49$estado_conyugal_2,
amelia_fit$imputations$imp50$estado_conyugal_2,
amelia_fit$imputations$imp51$estado_conyugal_2,
amelia_fit$imputations$imp52$estado_conyugal_2,
amelia_fit$imputations$imp53$estado_conyugal_2,
amelia_fit$imputations$imp54$estado_conyugal_2,
amelia_fit$imputations$imp55$estado_conyugal_2,
amelia_fit$imputations$imp56$estado_conyugal_2,
amelia_fit$imputations$imp57$estado_conyugal_2,
amelia_fit$imputations$imp58$estado_conyugal_2,
amelia_fit$imputations$imp59$estado_conyugal_2,
amelia_fit$imputations$imp60$estado_conyugal_2,
amelia_fit$imputations$imp61$estado_conyugal_2
)
estado_conyugal_2_imputed<-
estado_conyugal_2_imputed %>%
data.frame() %>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Married/Shared living arrangements",as.character(.))~1,TRUE~0), .names="married_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Separated/Divorced",as.character(.))~1,TRUE~0), .names="sep_div_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Single",as.character(.))~1,TRUE~0), .names="singl_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Widower",as.character(.))~1,TRUE~0), .names="widow_{col}"))%>%
dplyr::mutate(estado_conyugal_2_married = base::rowSums(dplyr::select(., starts_with("married_"))))%>%
dplyr::mutate(estado_conyugal_2_sep_div = base::rowSums(dplyr::select(., starts_with("sep_div_"))))%>%
dplyr::mutate(estado_conyugal_2_singl = base::rowSums(dplyr::select(., starts_with("singl_"))))%>%
dplyr::mutate(estado_conyugal_2_wid = base::rowSums(dplyr::select(., starts_with("widow_"))))%>%
#dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_married>0~1,TRUE~0)) %>%
dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_sep_div>0~estado_conyugal_2_tot+1,TRUE~estado_conyugal_2_tot)) %>%
dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_singl>0~estado_conyugal_2_tot+1,TRUE~estado_conyugal_2_tot)) %>%
dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_wid>0~estado_conyugal_2_tot+1,TRUE~estado_conyugal_2_tot)) %>%
janitor::clean_names()
estado_conyugal_2_imputed_cat_est_cony<-
estado_conyugal_2_imputed %>%
tidyr::pivot_longer(c(estado_conyugal_2_married, estado_conyugal_2_sep_div, estado_conyugal_2_singl, estado_conyugal_2_wid), names_to = "cat_est_conyugal", values_to = "count") %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::mutate(estado_conyugal_2_imputed_max=max(count,na.rm=T)) %>%
dplyr::ungroup() %>%
dplyr::filter(estado_conyugal_2_imputed_max==count) %>%
dplyr::select(amelia_fit_imputations_imp1_row,cat_est_conyugal,count) %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::mutate(n_row=n()) %>%
dplyr::ungroup() %>%
dplyr::mutate(cat_est_conyugal=dplyr::case_when(n_row>1~NA_character_,
TRUE~cat_est_conyugal)) %>%
dplyr::distinct(amelia_fit_imputations_imp1_row,.keep_all = T)
estado_conyugal_2_imputed<-
estado_conyugal_2_imputed %>%
dplyr::left_join(estado_conyugal_2_imputed_cat_est_cony, by="amelia_fit_imputations_imp1_row") %>%
dplyr::mutate(cat_est_conyugal=dplyr::case_when(cat_est_conyugal=="estado_conyugal_2_married"~"Married/Shared living arrangements",cat_est_conyugal=="estado_conyugal_2_sep_div"~"Separated/Divorced",cat_est_conyugal=="estado_conyugal_2_singl"~"Single",cat_est_conyugal=="estado_conyugal_2_wid"~"Widower"
))%>%
janitor::clean_names()
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_women_miss5_prev<-
CONS_C1_df_dup_SEP_2020_women_miss4 %>%
dplyr::left_join(dplyr::select(estado_conyugal_2_imputed,amelia_fit_imputations_imp1_row,cat_est_conyugal), by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(estado_conyugal_2=factor(dplyr::case_when(is.na(estado_conyugal_2)~as.character(cat_est_conyugal),TRUE~as.character(estado_conyugal_2)))) %>%
dplyr::select(-cat_est_conyugal) %>%
data.table()
# casos problemáticos de matrimonio c(59664, 17582, 161721, 36520)
no_calzaron_estado_cony<-
CONS_C1_df_dup_SEP_2020_women_miss5_prev %>% dplyr::filter(is.na(estado_conyugal_2)) %>% dplyr::distinct(row) %>% unlist()
estado_conyugal_2_imputed2<-
estado_conyugal_2_imputed %>%
dplyr::filter(amelia_fit_imputations_imp1_row %in% no_calzaron_estado_cony) %>%
dplyr::select(amelia_fit_imputations_imp1_row, estado_conyugal_2_married, estado_conyugal_2_sep_div,estado_conyugal_2_singl, estado_conyugal_2_wid, estado_conyugal_2_tot, cat_est_conyugal) %>%
melt(id.vars="amelia_fit_imputations_imp1_row") %>%
dplyr::mutate(value=as.numeric(value)) %>%
dplyr::arrange(amelia_fit_imputations_imp1_row) %>%
dplyr::filter(value!="cat_est_conyugal") %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
slice_max(value, with_ties = T) %>%
dplyr::filter(variable!="estado_conyugal_2_married") %>%
dplyr::left_join(CONS_C1_df_dup_SEP_2020_women[,c("row","edad_al_ing")], by=c("amelia_fit_imputations_imp1_row"="row")) %>%
dplyr::mutate(value=dplyr::case_when(variable=="estado_conyugal_2_sep_div"~value*10,
T~value)) %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
slice_max(value, with_ties = T) %>%
dplyr::ungroup() %>%
dplyr::mutate(marital_status_imp=dplyr::case_when(grepl("_singl",variable)~"Single",
grepl("_sep_div",variable)~"Separated/Divorced",
grepl("_married",variable)~"Married/Shared living arrangements",
grepl("_wid",variable)~"Widower"
))
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#2nd round of imputation for ties
CONS_C1_df_dup_SEP_2020_women_miss5<-
CONS_C1_df_dup_SEP_2020_women_miss5_prev %>%
dplyr::left_join(dplyr::select(estado_conyugal_2_imputed2,amelia_fit_imputations_imp1_row,marital_status_imp), by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(estado_conyugal_2=factor(dplyr::case_when(is.na(estado_conyugal_2)~as.character(marital_status_imp),TRUE~as.character(estado_conyugal_2)))) %>%
dplyr::select(-marital_status_imp) %>%
data.table()
#CONS_C1_df_dup_SEP_2020_women_miss5 %>%
#dplyr::filter(hash_key %in% CONS_C1_df_dup_SEP_2020_women_miss5 %>% dplyr::filter(is.na(estado_conyugal_2)) %>% dplyr::distinct(hash_key) %>% unlist())
if(nrow(CONS_C1_df_dup_SEP_2020_women_miss5)-nrow(CONS_C1_df_dup_SEP_2020_women_miss4)>0){
warning("AGS: Some rows were added in the imputation")}
We could not resolve Marital status in 0 cases due to ties in the most frequent values.
Cause of Discharge
We looked over possible imputations to the truly missing values, discarding missing values due to censorship (n=3). In case of ties, we replace with the more vulnerable value.
motivo_de_egreso_a_imputar<-
CONS_C1_df_dup_SEP_2020_women_miss %>% dplyr::filter(is.na(motivodeegreso_mod_imp)) %>% dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,fech_egres_imp)) %>% dplyr::filter(!is.na(fech_egres_imp))%>%dplyr::select(row)
## Joining, by = "row"
#CONS_C1_df_dup_SEP_2020 %>% dplyr::filter(is.na(motivodeegreso_mod_imp)) %>%
# dplyr::select(row, hash_key, motivodeegreso_mod_imp, fech_egres_imp)
# dplyr::filter(fech_egres_imp=="2019-11-13")
motivodeegreso_mod_imp_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$motivodeegreso_mod_imp,
amelia_fit$imputations$imp2$motivodeegreso_mod_imp,
amelia_fit$imputations$imp3$motivodeegreso_mod_imp,
amelia_fit$imputations$imp4$motivodeegreso_mod_imp,
amelia_fit$imputations$imp5$motivodeegreso_mod_imp,
amelia_fit$imputations$imp6$motivodeegreso_mod_imp,
amelia_fit$imputations$imp7$motivodeegreso_mod_imp,
amelia_fit$imputations$imp8$motivodeegreso_mod_imp,
amelia_fit$imputations$imp9$motivodeegreso_mod_imp,
amelia_fit$imputations$imp10$motivodeegreso_mod_imp,
amelia_fit$imputations$imp11$motivodeegreso_mod_imp,
amelia_fit$imputations$imp12$motivodeegreso_mod_imp,
amelia_fit$imputations$imp13$motivodeegreso_mod_imp,
amelia_fit$imputations$imp14$motivodeegreso_mod_imp,
amelia_fit$imputations$imp15$motivodeegreso_mod_imp,
amelia_fit$imputations$imp16$motivodeegreso_mod_imp,
amelia_fit$imputations$imp17$motivodeegreso_mod_imp,
amelia_fit$imputations$imp18$motivodeegreso_mod_imp,
amelia_fit$imputations$imp19$motivodeegreso_mod_imp,
amelia_fit$imputations$imp20$motivodeegreso_mod_imp,
amelia_fit$imputations$imp21$motivodeegreso_mod_imp,
amelia_fit$imputations$imp22$motivodeegreso_mod_imp,
amelia_fit$imputations$imp23$motivodeegreso_mod_imp,
amelia_fit$imputations$imp24$motivodeegreso_mod_imp,
amelia_fit$imputations$imp25$motivodeegreso_mod_imp,
amelia_fit$imputations$imp26$motivodeegreso_mod_imp,
amelia_fit$imputations$imp27$motivodeegreso_mod_imp,
amelia_fit$imputations$imp28$motivodeegreso_mod_imp,
amelia_fit$imputations$imp29$motivodeegreso_mod_imp,
amelia_fit$imputations$imp30$motivodeegreso_mod_imp,
amelia_fit$imputations$imp31$motivodeegreso_mod_imp,
amelia_fit$imputations$imp32$motivodeegreso_mod_imp,
amelia_fit$imputations$imp33$motivodeegreso_mod_imp,
amelia_fit$imputations$imp34$motivodeegreso_mod_imp,
amelia_fit$imputations$imp35$motivodeegreso_mod_imp,
amelia_fit$imputations$imp36$motivodeegreso_mod_imp,
amelia_fit$imputations$imp37$motivodeegreso_mod_imp,
amelia_fit$imputations$imp38$motivodeegreso_mod_imp,
amelia_fit$imputations$imp39$motivodeegreso_mod_imp,
amelia_fit$imputations$imp40$motivodeegreso_mod_imp,
amelia_fit$imputations$imp41$motivodeegreso_mod_imp,
amelia_fit$imputations$imp42$motivodeegreso_mod_imp,
amelia_fit$imputations$imp43$motivodeegreso_mod_imp,
amelia_fit$imputations$imp44$motivodeegreso_mod_imp,
amelia_fit$imputations$imp45$motivodeegreso_mod_imp,
amelia_fit$imputations$imp46$motivodeegreso_mod_imp,
amelia_fit$imputations$imp47$motivodeegreso_mod_imp,
amelia_fit$imputations$imp48$motivodeegreso_mod_imp,
amelia_fit$imputations$imp49$motivodeegreso_mod_imp,
amelia_fit$imputations$imp50$motivodeegreso_mod_imp,
amelia_fit$imputations$imp51$motivodeegreso_mod_imp,
amelia_fit$imputations$imp52$motivodeegreso_mod_imp,
amelia_fit$imputations$imp53$motivodeegreso_mod_imp,
amelia_fit$imputations$imp54$motivodeegreso_mod_imp,
amelia_fit$imputations$imp55$motivodeegreso_mod_imp,
amelia_fit$imputations$imp56$motivodeegreso_mod_imp,
amelia_fit$imputations$imp57$motivodeegreso_mod_imp,
amelia_fit$imputations$imp58$motivodeegreso_mod_imp,
amelia_fit$imputations$imp59$motivodeegreso_mod_imp,
amelia_fit$imputations$imp60$motivodeegreso_mod_imp,
amelia_fit$imputations$imp61$motivodeegreso_mod_imp
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
dplyr::arrange(amelia_fit_imputations_imp1_row) %>%
dplyr::ungroup() %>%
dplyr::filter(amelia_fit_imputations_imp1_row %in% unlist(motivo_de_egreso_a_imputar$row)) %>%
#FILTRAR CASOS QUE SON ILÓGICOS: MUERTES CON TRATAMIENTOS POSTERIORES (1)
dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,motivodeegreso_mod_imp, fech_egres_imp,dup, duplicates_filtered,evaluacindelprocesoteraputico,fech_ing_next_treat),by=c("amelia_fit_imputations_imp1_row"="row")) %>%
dplyr::mutate(value_death=dplyr::case_when(value=="Death"& !is.na(fech_ing_next_treat)~1,T~0)) %>%
dplyr::filter(value_death!=1) %>%
#FILTRAR CASOS QUE SON ILÓGICOS: NO PUEDEN HABER TRATAMIENTOS EN CURSO CON TRATAMIENTOS POSTERIORES (2)
dplyr::mutate(value_fail=dplyr::case_when(value=="Ongoing treatment"& !is.na(fech_ing_next_treat)~1,T~0)) %>%
dplyr::filter(value_fail!=1) %>%
#FILTRAR CASOS QUE SON ILÓGICOS: NO PUEDE HABER OTRA COSA QUE TRATAMIENTO EN CURSO CON FECHA DE CENSURA
dplyr::mutate(value_ong=dplyr::case_when(value!="Ongoing treatment" & fech_egres_imp=="2019-11-13"~1,T~0)) %>%
dplyr::filter(value_ong!=1) %>%
#:#:#:#:#:
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::summarise(adm_dis=sum(value == "Administrative discharge",na.rm=T),
death=sum(value == "Death",na.rm=T),
referral=sum(value == "Referral to another treatment",na.rm=T),
ther_dis=sum(value == "Therapeutic discharge",na.rm=T),
on_treat=sum(value == "Ongoing treatment",na.rm=T),
dropout=sum(value =="Drop-out",na.rm=T)) %>%
melt(id.vars="amelia_fit_imputations_imp1_row") %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::slice_max(value) %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::mutate(n=n()) %>%
dplyr::mutate(emp=dplyr::case_when(variable=="adm_dis" & n>1~1,T~0)) %>%
dplyr::filter(emp!=1) %>%
dplyr::mutate(motivodeegreso_mod_imp_imputation=
dplyr::case_when(variable=="adm_dis"~"Administrative discharge",
variable=="death"~"Death",
variable=="ther_dis"~"Therapeutic discharge",
variable=="on_treat"~"Ongoing treatment",
variable=="referral"~"Referral to another treatment",
variable=="dropout"~"Drop-out"))
## `summarise()` ungrouping output (override with `.groups` argument)
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:
CONS_C1_df_dup_SEP_2020_women_miss6<-
CONS_C1_df_dup_SEP_2020_women_miss5 %>%
dplyr::left_join(motivodeegreso_mod_imp_imputed[,c("amelia_fit_imputations_imp1_row","motivodeegreso_mod_imp_imputation")], by=c("row"="amelia_fit_imputations_imp1_row")) %>%
#dplyr::filter(is.na(motivodeegreso_mod_imp)) %>% dplyr::select(row,hash_key,motivodeegreso_mod_imp_original, motivodeegreso_mod_imp_imputation,motivodeegreso_mod_imp,fech_egres_num,fech_egres_imp)
dplyr::left_join(cbind.data.frame(motivo_de_egreso_a_imputar,value_to_impute=1),"row") %>%
dplyr::mutate(motivodeegreso_mod_imp=factor(
dplyr::case_when(is.na(motivodeegreso_mod_imp) & value_to_impute==1~motivodeegreso_mod_imp_imputation,
T~as.character(motivodeegreso_mod_imp)))) %>%
dplyr::select(-motivodeegreso_mod_imp_imputation,-value_to_impute) %>%
data.table()
#CONS_C1_df_dup_SEP_2020_women_miss9 %>% janitor::tabyl(motivodeegreso_mod_imp,motivodeegreso_mod_imp_original)
#CONS_C1_df_dup_SEP_2020_women_miss9 %>% janitor::tabyl(motivodeegreso_mod_imp_original)
CONS_C1_df_dup_SEP_2020_women_miss6 %>% janitor::tabyl(motivodeegreso_mod_imp) %>%
dplyr::mutate(percent=scales::percent(percent)) %>%
knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
caption = paste0("Table 2. Imputed Cause of Discharge vs. Original Cause of Discharge"),
#col.names = c("Cause of Discharge","1-High Achievement", "2- Medium Achievement","3- Minimum Achievement","Null Values"),
align =rep('c', 101)) %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 12) %>%
kableExtra::add_footnote("Note. NA= Null values", notation="none") %>%
kableExtra::scroll_box(width = "100%", height = "375px")
Table 2. Imputed Cause of Discharge vs. Original Cause of Discharge
|
motivodeegreso_mod_imp
|
n
|
percent
|
|
Administrative discharge
|
1,739
|
8.13%
|
|
Early Drop-out
|
3,160
|
14.78%
|
|
Late Drop-out
|
7,097
|
33.20%
|
|
Ongoing treatment
|
1,599
|
7.48%
|
|
Referral to another treatment
|
2,669
|
12.48%
|
|
Therapeutic discharge
|
5,114
|
23.92%
|
|
Note. NA= Null values
|
#
if(nrow(CONS_C1_df_dup_SEP_2020_women_miss6)-nrow(CONS_C1_df_dup_SEP_2020_women_miss5)>0){
warning("AGS: Some rows were added in the imputation")}
As a result of the imputations, there were no missing values.
Biopsychosocial involvement
Another variable that is worth imputing is the Biopsychosocial involvement (n= 370). In case of ties, we selected the imputed values with the value with the minimum involvement. In case of ties, we chose the most vulnerable value.
# Ver distintos valores propuestos para sustancia de inciio
#No se ve un patrón de dependencia entre el compromiso biopsicosocial y el estatus de egreso
# table(CONS_C1_df_dup_SEP_2020_women_miss$compromiso_biopsicosocial,
# CONS_C1_df_dup_SEP_2020_women_miss$motivodeegreso_mod_imp)
comp_biopsisoc_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$compromiso_biopsicosocial,
amelia_fit$imputations$imp2$compromiso_biopsicosocial,
amelia_fit$imputations$imp3$compromiso_biopsicosocial,
amelia_fit$imputations$imp4$compromiso_biopsicosocial,
amelia_fit$imputations$imp5$compromiso_biopsicosocial,
amelia_fit$imputations$imp6$compromiso_biopsicosocial,
amelia_fit$imputations$imp7$compromiso_biopsicosocial,
amelia_fit$imputations$imp8$compromiso_biopsicosocial,
amelia_fit$imputations$imp9$compromiso_biopsicosocial,
amelia_fit$imputations$imp10$compromiso_biopsicosocial,
amelia_fit$imputations$imp11$compromiso_biopsicosocial,
amelia_fit$imputations$imp12$compromiso_biopsicosocial,
amelia_fit$imputations$imp13$compromiso_biopsicosocial,
amelia_fit$imputations$imp14$compromiso_biopsicosocial,
amelia_fit$imputations$imp15$compromiso_biopsicosocial,
amelia_fit$imputations$imp16$compromiso_biopsicosocial,
amelia_fit$imputations$imp17$compromiso_biopsicosocial,
amelia_fit$imputations$imp18$compromiso_biopsicosocial,
amelia_fit$imputations$imp19$compromiso_biopsicosocial,
amelia_fit$imputations$imp20$compromiso_biopsicosocial,
amelia_fit$imputations$imp21$compromiso_biopsicosocial,
amelia_fit$imputations$imp22$compromiso_biopsicosocial,
amelia_fit$imputations$imp23$compromiso_biopsicosocial,
amelia_fit$imputations$imp24$compromiso_biopsicosocial,
amelia_fit$imputations$imp25$compromiso_biopsicosocial,
amelia_fit$imputations$imp26$compromiso_biopsicosocial,
amelia_fit$imputations$imp27$compromiso_biopsicosocial,
amelia_fit$imputations$imp28$compromiso_biopsicosocial,
amelia_fit$imputations$imp29$compromiso_biopsicosocial,
amelia_fit$imputations$imp30$compromiso_biopsicosocial,
amelia_fit$imputations$imp31$compromiso_biopsicosocial,
amelia_fit$imputations$imp32$compromiso_biopsicosocial,
amelia_fit$imputations$imp33$compromiso_biopsicosocial,
amelia_fit$imputations$imp34$compromiso_biopsicosocial,
amelia_fit$imputations$imp35$compromiso_biopsicosocial,
amelia_fit$imputations$imp36$compromiso_biopsicosocial,
amelia_fit$imputations$imp37$compromiso_biopsicosocial,
amelia_fit$imputations$imp38$compromiso_biopsicosocial,
amelia_fit$imputations$imp39$compromiso_biopsicosocial,
amelia_fit$imputations$imp40$compromiso_biopsicosocial,
amelia_fit$imputations$imp41$compromiso_biopsicosocial,
amelia_fit$imputations$imp42$compromiso_biopsicosocial,
amelia_fit$imputations$imp43$compromiso_biopsicosocial,
amelia_fit$imputations$imp44$compromiso_biopsicosocial,
amelia_fit$imputations$imp45$compromiso_biopsicosocial,
amelia_fit$imputations$imp46$compromiso_biopsicosocial,
amelia_fit$imputations$imp47$compromiso_biopsicosocial,
amelia_fit$imputations$imp48$compromiso_biopsicosocial,
amelia_fit$imputations$imp49$compromiso_biopsicosocial,
amelia_fit$imputations$imp50$compromiso_biopsicosocial,
amelia_fit$imputations$imp51$compromiso_biopsicosocial,
amelia_fit$imputations$imp52$compromiso_biopsicosocial,
amelia_fit$imputations$imp53$compromiso_biopsicosocial,
amelia_fit$imputations$imp54$compromiso_biopsicosocial,
amelia_fit$imputations$imp55$compromiso_biopsicosocial,
amelia_fit$imputations$imp56$compromiso_biopsicosocial,
amelia_fit$imputations$imp57$compromiso_biopsicosocial,
amelia_fit$imputations$imp58$compromiso_biopsicosocial,
amelia_fit$imputations$imp59$compromiso_biopsicosocial,
amelia_fit$imputations$imp60$compromiso_biopsicosocial,
amelia_fit$imputations$imp61$compromiso_biopsicosocial
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
dplyr::arrange(amelia_fit_imputations_imp1_row) %>%
dplyr::ungroup() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
# 1-Mild 2-Moderate 3-Severe
dplyr::summarise(severe_3=sum(value == "3-Severe",na.rm=T),
mod_2=sum(value == "2-Moderate",na.rm=T),
mild_1=sum(value =="1-Mild",na.rm=T)) %>%
dplyr::ungroup() %>%
dplyr::mutate(comp_biopsisoc_imp= dplyr::case_when(
(severe_3>mild_1) & (severe_3>mod_2)~"3-Severe",
(mod_2>mild_1) & (mod_2>severe_3)~"2-Moderate",
(mild_1>mod_2) & (mild_1>severe_3)~"1-Mild"
)) %>%
#2) Resolve ties
dplyr::mutate(ties= dplyr::case_when(is.na(comp_biopsisoc_imp)~1,T~0)) %>%
dplyr::mutate(comp_biopsisoc_imp= dplyr::case_when(ties==1 & ((severe_3>mod_2)|(severe_3>mild_1))~"3-Severe",
ties==1 & ((mod_2>mild_1)|(mod_2>severe_3))~"2-Moderate",
T~comp_biopsisoc_imp))
## `summarise()` ungrouping output (override with `.groups` argument)
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
##
#CONS_C1_df_dup_SEP_2020 %>% janitor::tabyl(motivodeegreso_mod_imp,evaluacindelprocesoteraputico)
CONS_C1_df_dup_SEP_2020_women_miss7<-
CONS_C1_df_dup_SEP_2020_women_miss6 %>%
dplyr::left_join(comp_biopsisoc_imputed[,c("amelia_fit_imputations_imp1_row","comp_biopsisoc_imp")], by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(compromiso_biopsicosocial=factor(dplyr::case_when(is.na(compromiso_biopsicosocial) ~comp_biopsisoc_imp,
TRUE~as.character(compromiso_biopsicosocial)))) %>%
dplyr::mutate(compromiso_biopsicosocial=parse_factor(as.character(compromiso_biopsicosocial),levels=c('1-Mild', '2-Moderate','3-Severe'), ordered =T,trim_ws=T,include_na =F, locale=locale(encoding = "UTF-8"))) %>%
dplyr::select(-comp_biopsisoc_imp) %>%
data.table()
if(nrow(CONS_C1_df_dup_SEP_2020_women_miss7)-nrow(CONS_C1_df_dup_SEP_2020_women_miss6)>0){
warning("AGS: Some rows were added in the imputation")}
As a result of the imputations, there were no missing values.
Tenure status of households
Another variable that is worth imputing is the Tenure status of households (n= 942). In case of ties, we selected the imputed values with the value with the minimum involvement. In case of ties, we kept what we thought was the most vulnerable value (discarding “Owner” or “Renting” values).
tenencia_de_la_vivienda_mod_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp2$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp3$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp4$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp5$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp6$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp7$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp8$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp9$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp10$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp11$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp12$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp13$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp14$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp15$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp16$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp17$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp18$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp19$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp20$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp21$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp22$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp23$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp24$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp25$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp26$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp27$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp28$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp29$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp30$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp31$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp32$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp33$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp34$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp35$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp36$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp37$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp38$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp39$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp40$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp41$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp42$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp43$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp44$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp45$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp46$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp47$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp48$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp49$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp50$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp51$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp52$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp53$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp54$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp55$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp56$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp57$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp58$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp59$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp60$tenencia_de_la_vivienda_mod,
amelia_fit$imputations$imp61$tenencia_de_la_vivienda_mod
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row, value) %>%
tally() %>%
dplyr::ungroup() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::top_n(1,n) %>%
dplyr::ungroup()
#tenencia_de_la_vivienda_mod_imputed %>%
# pivot_wider(id_cols="amelia_fit_imputations_imp1_row",names_from="value", values_from="n", values_fill=0) %>%
# dplyr::ungroup()
tenencia_de_la_vivienda_mod_imputed_dup<-
tenencia_de_la_vivienda_mod_imputed %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::mutate(num=n()) %>%
dplyr::filter(num>1) %>%
dplyr::ungroup() %>%
#1) owner, discard if it is in the maximum
dplyr::mutate(n=dplyr::case_when(value=="Owner/Transferred dwellings/Pays Dividends"~0,T~as.numeric(n))) %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::top_n(1,n) %>%
dplyr::ungroup() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
#2) Renting vs. stays temporarily with a relative, keep the second
dplyr::mutate(n=dplyr::case_when(value=="Renting"~0,T~as.numeric(n))) %>%
dplyr::top_n(1,n) %>%
dplyr::ungroup() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::mutate(n_dup=n())
tenencia_de_la_vivienda_mod_imputed_final<-
tenencia_de_la_vivienda_mod_imputed %>%
dplyr::left_join(tenencia_de_la_vivienda_mod_imputed_dup, by=c("amelia_fit_imputations_imp1_row", "value")) %>%
#si es vacío, y no está en la base, es valor 0 (es difícil que)
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::mutate(sum= suppressWarnings(max(num, na.rm=T))) %>%
dplyr::ungroup() %>%
#descarto los que presentaron más de un valor para una misma fila y aquellos que no fueron seleccionados
dplyr::mutate(descartar=dplyr::case_when(sum>1 & is.na(n.y)~1,T~0)) %>%
dplyr::filter(descartar==0)
ifelse(nrow(tenencia_de_la_vivienda_mod_imputed_final)/length(unique(CONS_C1_df_dup_SEP_2020_women_miss7$row))>1,
"There are still more than one value in the imputation","")
## [1] ""
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
#CONS_C1_df_dup_SEP_2020 %>% janitor::tabyl(motivodeegreso_mod_imp,evaluacindelprocesoteraputico)
CONS_C1_df_dup_SEP_2020_women_miss8<-
CONS_C1_df_dup_SEP_2020_women_miss7 %>%
dplyr::left_join(tenencia_de_la_vivienda_mod_imputed_final[,c("amelia_fit_imputations_imp1_row","value")], by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(tenencia_de_la_vivienda_mod=factor(dplyr::case_when(is.na(tenencia_de_la_vivienda_mod) ~value,
TRUE~as.character(tenencia_de_la_vivienda_mod)))) %>%
dplyr::select(-value) %>%
data.table()
if(nrow(CONS_C1_df_dup_SEP_2020_women_miss8)-nrow(CONS_C1_df_dup_SEP_2020_women_miss7)>0){
warning("AGS: Some rows were added in the imputation")}
As a result of the imputations, there were no missing values.
Number of children (max. Value) (Dichotomized)
A numeric variable that had a great proportion of missing values was this (n= 82).
As seen in the figure above, most of the imputations were around 1 and 3 children, leaving less space for an imputation of no children or more than 3. We imputed these values, by approximating the mean of the 61 candidate values to a discrete number.
numero_de_hijos_mod_rec_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp2$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp3$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp4$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp5$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp6$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp7$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp8$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp9$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp10$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp11$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp12$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp13$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp14$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp15$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp16$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp17$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp18$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp19$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp20$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp21$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp22$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp23$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp24$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp25$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp26$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp27$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp28$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp29$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp30$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp31$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp32$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp33$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp34$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp35$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp36$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp37$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp38$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp39$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp40$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp41$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp42$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp43$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp44$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp45$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp46$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp47$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp48$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp49$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp50$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp51$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp52$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp53$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp54$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp55$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp56$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp57$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp58$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp59$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp60$numero_de_hijos_mod_rec,
amelia_fit$imputations$imp61$numero_de_hijos_mod_rec
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::summarise(children= sum(value=="Yes"),
no_children= sum(value=="No")) %>%
dplyr::mutate(numero_de_hijos_mod_rec_imp=dplyr::case_when(children>=31~"Yes",
no_children>=31~"No"))
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_women_miss9<-
CONS_C1_df_dup_SEP_2020_women_miss8 %>%
dplyr::left_join(dplyr::select(numero_de_hijos_mod_rec_imputed,amelia_fit_imputations_imp1_row,numero_de_hijos_mod_rec_imp), by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(numero_de_hijos_mod_rec=factor(dplyr::case_when(is.na(numero_de_hijos_mod_rec)~as.character(numero_de_hijos_mod_rec_imp),T~as.character(numero_de_hijos_mod_rec)))) %>%
dplyr::select(-numero_de_hijos_mod_rec_imp) %>%
data.table()
#table(is.na(CONS_C1_df_dup_SEP_2020_women_miss12$numero_de_hijos_mod_rec))
if(nrow(CONS_C1_df_dup_SEP_2020_women_miss9)-nrow(CONS_C1_df_dup_SEP_2020_women_miss8)>0){
warning("AGS: Some rows were added in the imputation")}
As a result of the imputations, there were no missing values.
Type of Program
A numeric variable that was important to impute missing values was the type of program (n= 17).
tipo_de_programa_2_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$tipo_de_programa_2,
amelia_fit$imputations$imp2$tipo_de_programa_2,
amelia_fit$imputations$imp3$tipo_de_programa_2,
amelia_fit$imputations$imp4$tipo_de_programa_2,
amelia_fit$imputations$imp5$tipo_de_programa_2,
amelia_fit$imputations$imp6$tipo_de_programa_2,
amelia_fit$imputations$imp7$tipo_de_programa_2,
amelia_fit$imputations$imp8$tipo_de_programa_2,
amelia_fit$imputations$imp9$tipo_de_programa_2,
amelia_fit$imputations$imp10$tipo_de_programa_2,
amelia_fit$imputations$imp11$tipo_de_programa_2,
amelia_fit$imputations$imp12$tipo_de_programa_2,
amelia_fit$imputations$imp13$tipo_de_programa_2,
amelia_fit$imputations$imp14$tipo_de_programa_2,
amelia_fit$imputations$imp15$tipo_de_programa_2,
amelia_fit$imputations$imp16$tipo_de_programa_2,
amelia_fit$imputations$imp17$tipo_de_programa_2,
amelia_fit$imputations$imp18$tipo_de_programa_2,
amelia_fit$imputations$imp19$tipo_de_programa_2,
amelia_fit$imputations$imp20$tipo_de_programa_2,
amelia_fit$imputations$imp21$tipo_de_programa_2,
amelia_fit$imputations$imp22$tipo_de_programa_2,
amelia_fit$imputations$imp23$tipo_de_programa_2,
amelia_fit$imputations$imp24$tipo_de_programa_2,
amelia_fit$imputations$imp25$tipo_de_programa_2,
amelia_fit$imputations$imp26$tipo_de_programa_2,
amelia_fit$imputations$imp27$tipo_de_programa_2,
amelia_fit$imputations$imp28$tipo_de_programa_2,
amelia_fit$imputations$imp29$tipo_de_programa_2,
amelia_fit$imputations$imp30$tipo_de_programa_2,
amelia_fit$imputations$imp31$tipo_de_programa_2,
amelia_fit$imputations$imp32$tipo_de_programa_2,
amelia_fit$imputations$imp33$tipo_de_programa_2,
amelia_fit$imputations$imp34$tipo_de_programa_2,
amelia_fit$imputations$imp35$tipo_de_programa_2,
amelia_fit$imputations$imp36$tipo_de_programa_2,
amelia_fit$imputations$imp37$tipo_de_programa_2,
amelia_fit$imputations$imp38$tipo_de_programa_2,
amelia_fit$imputations$imp39$tipo_de_programa_2,
amelia_fit$imputations$imp40$tipo_de_programa_2,
amelia_fit$imputations$imp41$tipo_de_programa_2,
amelia_fit$imputations$imp42$tipo_de_programa_2,
amelia_fit$imputations$imp43$tipo_de_programa_2,
amelia_fit$imputations$imp44$tipo_de_programa_2,
amelia_fit$imputations$imp45$tipo_de_programa_2,
amelia_fit$imputations$imp46$tipo_de_programa_2,
amelia_fit$imputations$imp47$tipo_de_programa_2,
amelia_fit$imputations$imp48$tipo_de_programa_2,
amelia_fit$imputations$imp49$tipo_de_programa_2,
amelia_fit$imputations$imp50$tipo_de_programa_2,
amelia_fit$imputations$imp51$tipo_de_programa_2,
amelia_fit$imputations$imp52$tipo_de_programa_2,
amelia_fit$imputations$imp53$tipo_de_programa_2,
amelia_fit$imputations$imp54$tipo_de_programa_2,
amelia_fit$imputations$imp55$tipo_de_programa_2,
amelia_fit$imputations$imp56$tipo_de_programa_2,
amelia_fit$imputations$imp57$tipo_de_programa_2,
amelia_fit$imputations$imp58$tipo_de_programa_2,
amelia_fit$imputations$imp59$tipo_de_programa_2,
amelia_fit$imputations$imp60$tipo_de_programa_2,
amelia_fit$imputations$imp61$tipo_de_programa_2
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::summarise(WE= sum(value=="Women specific"),
GP= sum(value=="General population")) %>%
dplyr::mutate(tipo_de_programa_2_imp=dplyr::case_when(WE>=31~"Women specific",
GP>=31~"General population"))
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_women_miss10<-
CONS_C1_df_dup_SEP_2020_women_miss9 %>%
dplyr::left_join(dplyr::select(tipo_de_programa_2_imputed,amelia_fit_imputations_imp1_row,tipo_de_programa_2_imp), by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(tipo_de_programa_2=factor(dplyr::case_when(is.na(tipo_de_programa_2)~as.character(tipo_de_programa_2_imp),T~as.character(tipo_de_programa_2)))) %>%
dplyr::select(-tipo_de_programa_2_imp) %>%
data.table()
#table(is.na(CONS_C1_df_dup_SEP_2020_women_miss12$tipo_de_programa_2))
if(nrow(CONS_C1_df_dup_SEP_2020_women_miss10)-nrow(CONS_C1_df_dup_SEP_2020_women_miss9)>0){
warning("AGS: Some rows were added in the imputation")}
As a result of the imputations, there were no missing values.
Type of Plan
We looked over possible imputations to the type of plan (n=17).
tipo_de_plan_res_imputed<-
cbind.data.frame(amelia_fit$imputations$imp1$row,
amelia_fit$imputations$imp1$tipo_de_plan_res,
amelia_fit$imputations$imp2$tipo_de_plan_res,
amelia_fit$imputations$imp3$tipo_de_plan_res,
amelia_fit$imputations$imp4$tipo_de_plan_res,
amelia_fit$imputations$imp5$tipo_de_plan_res,
amelia_fit$imputations$imp6$tipo_de_plan_res,
amelia_fit$imputations$imp7$tipo_de_plan_res,
amelia_fit$imputations$imp8$tipo_de_plan_res,
amelia_fit$imputations$imp9$tipo_de_plan_res,
amelia_fit$imputations$imp10$tipo_de_plan_res,
amelia_fit$imputations$imp11$tipo_de_plan_res,
amelia_fit$imputations$imp12$tipo_de_plan_res,
amelia_fit$imputations$imp13$tipo_de_plan_res,
amelia_fit$imputations$imp14$tipo_de_plan_res,
amelia_fit$imputations$imp15$tipo_de_plan_res,
amelia_fit$imputations$imp16$tipo_de_plan_res,
amelia_fit$imputations$imp17$tipo_de_plan_res,
amelia_fit$imputations$imp18$tipo_de_plan_res,
amelia_fit$imputations$imp19$tipo_de_plan_res,
amelia_fit$imputations$imp20$tipo_de_plan_res,
amelia_fit$imputations$imp21$tipo_de_plan_res,
amelia_fit$imputations$imp22$tipo_de_plan_res,
amelia_fit$imputations$imp23$tipo_de_plan_res,
amelia_fit$imputations$imp24$tipo_de_plan_res,
amelia_fit$imputations$imp25$tipo_de_plan_res,
amelia_fit$imputations$imp26$tipo_de_plan_res,
amelia_fit$imputations$imp27$tipo_de_plan_res,
amelia_fit$imputations$imp28$tipo_de_plan_res,
amelia_fit$imputations$imp29$tipo_de_plan_res,
amelia_fit$imputations$imp30$tipo_de_plan_res,
amelia_fit$imputations$imp31$tipo_de_plan_res,
amelia_fit$imputations$imp32$tipo_de_plan_res,
amelia_fit$imputations$imp33$tipo_de_plan_res,
amelia_fit$imputations$imp34$tipo_de_plan_res,
amelia_fit$imputations$imp35$tipo_de_plan_res,
amelia_fit$imputations$imp36$tipo_de_plan_res,
amelia_fit$imputations$imp37$tipo_de_plan_res,
amelia_fit$imputations$imp38$tipo_de_plan_res,
amelia_fit$imputations$imp39$tipo_de_plan_res,
amelia_fit$imputations$imp40$tipo_de_plan_res,
amelia_fit$imputations$imp41$tipo_de_plan_res,
amelia_fit$imputations$imp42$tipo_de_plan_res,
amelia_fit$imputations$imp43$tipo_de_plan_res,
amelia_fit$imputations$imp44$tipo_de_plan_res,
amelia_fit$imputations$imp45$tipo_de_plan_res,
amelia_fit$imputations$imp46$tipo_de_plan_res,
amelia_fit$imputations$imp47$tipo_de_plan_res,
amelia_fit$imputations$imp48$tipo_de_plan_res,
amelia_fit$imputations$imp49$tipo_de_plan_res,
amelia_fit$imputations$imp50$tipo_de_plan_res,
amelia_fit$imputations$imp51$tipo_de_plan_res,
amelia_fit$imputations$imp52$tipo_de_plan_res,
amelia_fit$imputations$imp53$tipo_de_plan_res,
amelia_fit$imputations$imp54$tipo_de_plan_res,
amelia_fit$imputations$imp55$tipo_de_plan_res,
amelia_fit$imputations$imp56$tipo_de_plan_res,
amelia_fit$imputations$imp57$tipo_de_plan_res,
amelia_fit$imputations$imp58$tipo_de_plan_res,
amelia_fit$imputations$imp59$tipo_de_plan_res,
amelia_fit$imputations$imp60$tipo_de_plan_res,
amelia_fit$imputations$imp61$tipo_de_plan_res
) %>%
melt(id.vars="amelia_fit$imputations$imp1$row") %>%
janitor::clean_names() %>%
dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
dplyr::summarise(outpatient= sum(value=="Outpatient"),
residential= sum(value=="Residential")) %>%
dplyr::mutate(tipo_de_plan_res_imp=dplyr::case_when(outpatient>=31~"Outpatient",
residential>=31~"Residential"))
## `summarise()` ungrouping output (override with `.groups` argument)
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_women_miss11<-
CONS_C1_df_dup_SEP_2020_women_miss10 %>%
dplyr::left_join(dplyr::select(tipo_de_plan_res_imputed,amelia_fit_imputations_imp1_row,tipo_de_plan_res_imp), by=c("row"="amelia_fit_imputations_imp1_row")) %>%
dplyr::mutate(tipo_de_plan_res=factor(dplyr::case_when(is.na(tipo_de_plan_res)~as.character(tipo_de_plan_res_imp),T~as.character(tipo_de_plan_res)))) %>%
dplyr::select(-tipo_de_plan_res_imp) %>%
data.table()
#table(is.na(CONS_C1_df_dup_SEP_2020_women_miss11$tipo_centro_pub))
#table(is.na(CONS_C1_df_dup_SEP_2020_women_miss11$nombre_region))
As a result, there were no missing values once imputed.
Survival analysis
Incidence rate
To describe the incidence rate of SUD treatment readmissions by type of health conditions, we recoded the different variables of interest into pairs of different groups by each variable.
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr<-
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados %>%
dplyr::left_join(CONS_C1_df_dup_SEP_2020[,c("row","person_years")], by="row") %>%
dplyr::mutate(readmission= factor(ifelse(!is.na(fech_ing_next_treat),1,0))) %>%
dplyr::mutate(day_to_readmission= dplyr::case_when(
readmission==1~ (fech_ing_next_treat-fech_ing_num)/365.25,#,
readmission==0~ (as.numeric(as.Date("2019-11-13"))-fech_ing_num)/365.25)) %>%
dplyr::mutate(comp_status=factor(dplyr::case_when(motivodeegreso_mod_imp=="Therapeutic discharge"~1,grepl("Drop",motivodeegreso_mod_imp)~2,grepl("Administrative",motivodeegreso_mod_imp)~2,T~0),levels = c(0,1,2),labels = c("Censored","Therapeutic discharge","Discharge without clinical advice"))) %>%
dplyr::mutate(time_to_outcome= dplyr::case_when(
grepl("Censored",comp_status,ignore.case= T)~(as.numeric(as.Date("2019-11-13"))-fech_ing_num)/365.25,
grepl("Discharge",comp_status,ignore.case= T)~(fech_egres_num-fech_ing_num)/365.25)) %>%
dplyr::mutate(outcome_to_readmission= dplyr::case_when(
readmission==1~ (fech_ing_next_treat-fech_egres_num)/365.25,# & grepl("",comp_status)
readmission==0~ (as.numeric(as.Date("2019-11-13"))-fech_egres_num)/365.25))
library(survminer)
tipo_de_programa_2_fit<- survfit(Surv(day_to_readmission, readmission==1) ~ strata(tipo_de_programa_2),
data=CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr,
type = "kaplan-meier",
error = "greenwood",
conf.type = "log-log")
survdiff_tipo_de_programa_2<-survdiff(Surv(day_to_readmission, readmission==1) ~ tipo_de_programa_2,
data=CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr)
library(tidyverse)
library(lubridate)
library(ggfortify)
tipo_de_programa_2_na <- tipo_de_programa_2_fit %>% fortify %>% group_by(strata) %>% mutate(CumHaz = cumsum(n.event/n.risk))
#%>% mutate(time=time*365.25)
#If one of the groups has not yet dropped to 50% survival at the end of the available data, you cannot compute a median survival and there will be NA values for median survival in such cases. Even if median survival has been reached in a group, it might not be possible to calculate complete confidence intervals for those median values, as you have seen.
#for which <50% of records have an event. Without further assumptions (e.g. that some specific parametric distribution applies) it is then not possible to give a point estimate for the median time to event (the time when half the records have an event).
#http://rstudio-pubs-static.s3.amazonaws.com/522481_5e55bec9c94044678e680a6d07e96a2e.html
#https://rstudio-pubs-static.s3.amazonaws.com/258589_cd197f86fb5548ac89d7bcffd4bc6afe.html
#http://pcool.dyndns.org:8080/statsbook/?page_id=513
#http://rstudio-pubs-static.s3.amazonaws.com/316989_83cbe556125645b698c9ff6cf88c4c1a.html
#https://cran.r-project.org/web/packages/survminer/readme/README.html
#https://docs.ufpr.br/~jlpadilha/CE077/Aulas/2.TecnicasNaoParametricas.pdf
#http://www.columbia.edu/~sjm2186/EPIC_R/packages.pdf
if(no_mostrar==1){
tipo_de_programa_2_na %>%
ggplot(aes(time,CumHaz,color=strata,fill=strata))+
geom_line()+
geom_ribbon(aes(ymin = CumHaz - (1.96*std.err), ymax = CumHaz + (1.96*std.err)),alpha=.4)+
sjPlot::theme_sjplot2()+
scale_color_manual(name="Type of Program", values=c("#E69F00", "#56B4E9"))+
scale_fill_manual(name="Type of Program", values=c("#E69F00", "#56B4E9"))
}
ggsurvplot_tipo_de_programa_2_fit<-
ggsurvplot(tipo_de_programa_2_fit,
fun = "cumhaz",
conf.int = TRUE,
legend.labs = c("General population", "Women specific"),
risk.table = "abs_pct",
#ncensor.plot = TRUE,
ggtheme = theme_classic2(base_size=10),
#ylim=c(0,1),
legend = c(0.88, 0.15),
legend.title="Type of Program",
xlab= "Time (in years)",
cumevents = TRUE,
surv.connect = T,
tables.theme = theme_cleantable(),
censor= F,
tables.height = 0.15,
risk.table.y.text.col = F,
risk.table.col="black",
font.tickslab = c(10),
risk.table.height = .15,
risk.table.fontsize = 2.5,
cumevents.y.text.col = F,
cumevents.col="black",
cumevents.height = .12,
cumevents.fontsize = 2,
break.time.by = 1,
pval = F,
#xscale= "d_y", #scale days to years
palette = c("skyblue4","orangered4"))
ggsurvplot_tipo_de_programa_2_fit
# scale_y_continuous(breaks = sort(c(seq(0, 100, 10), 56)))
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
irrs<-function(x, y="event", z="person_days",db){
#x= variable que agrupa
#y= evento explicado
#z= person days
#db= base de datos
fmla <- as.formula(paste0(y,"~",x))
fmla2 <- as.formula(paste0(z,"~",x))
assign(paste0("irr_",y,"_por_",x),
rateratio.test::rateratio.test(
x=as.numeric(xtabs(fmla, data=get(db)))[c(2,1)],
n=as.numeric(xtabs(fmla, data=get(db)))[c(2,1)]
)
)
return(
rateratio.test::rateratio.test(
x=as.numeric(xtabs(fmla, data=get(db)))[c(2,1)],
n=as.numeric(xtabs(fmla2, data=get(db)))[c(2,1)]
)
)
}
irrs_tipo_de_programa_2<-irrs(x="tipo_de_programa_2", z="day_to_readmission", y="as.numeric(readmission)", db="CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr")
#General population=1 Women specific=2
if(no_mostrar==1){
jpeg("C:/Users/andre/Desktop/SUD_CL/eso.jpg", height=10, width= 10, res= 96, units = "in")
ggsurvplot_tipo_de_programa_2_fit
dev.off()
}
The incidence rate of readmission was 1.22 (95% IC 1.19-1.25) higher in users that were admitted in women specific programs, compared with users that were admitted in general-population programs (p<0.001).
library(survminer)
comp_status_fit<- survfit(Surv(outcome_to_readmission, readmission==1) ~strata(comp_status),
data= CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr,
type = "kaplan-meier",
error = "greenwood",
conf.type = "log-log")
library(tidyverse)
library(lubridate)
library(ggfortify)
comp_status_fit_na <- comp_status_fit %>% fortify %>% group_by(strata) %>% mutate(CumHaz = cumsum(n.event/n.risk))
ggsurvplot_comp_status_fit<-
ggsurvplot(comp_status_fit,
fun = "cumhaz",
conf.int = TRUE,
legend.labs = c("Censored","Therapeutic discharge", "Discharge w/o clinical advice"),
risk.table = "abs_pct",
#ncensor.plot = TRUE,
ggtheme = theme_classic2(base_size=10),
risk.table.y.text.col = F,
risk.table.col="black",
font.tickslab = c(10),
risk.table.height = .15,
risk.table.fontsize = 2.5,
break.time.by = 1,
pval = F,
cumevents = TRUE,
tables.theme = theme_cleantable(),
cumevents.y.text.col = F,
cumevents.col="black",
cumevents.height = .11,
cumevents.fontsize = .5,
#ylim=c(0,10),
legend = c(0.88, 0.15),
legend.title="Type of Program",
xlab= "Time (in years)",
#cumevents=T,
surv.connect = T,
censor= F,
#xscale= "d_y",
palette = c("skyblue4","darkgreen","orangered4"))
ggsurvplot_comp_status_fit
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#table(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr$comp_status,
# CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr$motivodeegreso_mod_imp)
irrs<-function(x, y="event", z="person_days",db){
#x= variable que agrupa
#y= evento explicado
#z= person days
#db= base de datos
fmla <- as.formula(paste0(y,"~",x))
fmla2 <- as.formula(paste0(z,"~",x))
assign(paste0("irr_",y,"_por_",x),
rateratio.test::rateratio.test(
x=as.numeric(xtabs(fmla, data=get(db)))[c(2,1)],
n=as.numeric(xtabs(fmla, data=get(db)))[c(2,1)]
)
)
return(
rateratio.test::rateratio.test(
x=as.numeric(xtabs(fmla, data=get(db)))[c(2,1)],
n=as.numeric(xtabs(fmla2, data=get(db)))[c(2,1)]
)
)
}
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_cmprsk<-
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr[which(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr$comp_status!='Censored')]
comp_status_readmission <-
irrs(x="as.numeric(comp_status)",
y="as.numeric(readmission)",
z="outcome_to_readmission",
db="CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_cmprsk")
time_bef_inc_rate<-Sys.time()
Excluding censored cases, the incidence rate of readmission was not significantly higher (0.99, 95% IC 0.96-1.02) in users that were admitted in women specific programs, compared with users that were admitted in general-population programs (p= 0.387).
Parametric regression, Interaction of Type of Program and Treatment Outcome on Readmission
To estimate the risk and the time to treatment readmission by type of treatment program (i.e, women-only and mixed-gender treatment programs) conditioned by previous treatment outcome (i.e., administrative discharge, early and late drop-outs, therapeutic discharge), we chose the parametric distribution that resembles most to the Kaplan-meier survival curve among the intercept-only models.
#Revisión de los casos
#print(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr[,c("comp_status", "outcome_to_readmission", "readmission")],10)
#_#_#_#_#_#_#_#_#_#_#_
fitted_flexsurvreg0<-data.frame()
fit_flexsurvreg0<-data.frame()
dists_simple_surv <- cbind.data.frame(
formal=rep(c("Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
"Generalized gamma", "Lognormal", "Exponential"),1),
model=rep(c("weibull", "weibullph", "llogis", "gamma", "gengamma", "gompertz", "lnorm", "exp"),1))
fitform0a <- Surv(outcome_to_readmission, readmission==1) ~ 1
fitform0b <- Surv(outcome_to_readmission, readmission==1) ~ edad_al_ing_grupos+ escolaridad_rec+ sus_principal_mod+
freq_cons_sus_prin+ compromiso_biopsicosocial+ tenencia_de_la_vivienda_mod+
num_otras_sus_mod+ numero_de_hijos_mod_rec+ tipo_de_plan_res+ tipo_de_programa_2*comp_status
#_#_#_#_#_#_#_#_#_#_#
#_#_#_#_#_#_#_#_#_#_#
for (i in 1:nrow(dists_simple_surv)){
model<-paste0("mod_surv_simple_",dists_simple_surv[i,"model"])
flexsurvreg(formula=fitform0a,
# si habían tiempos 0, los cambié por .000001 para que no hayan casos absolutamente continuos
# saqué los casos censurados
data = subset(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr, comp_status != "Censored") %>% dplyr::mutate(outcome_to_readmission=ifelse(outcome_to_readmission==0,.000001,outcome_to_readmission)),
dist = dists_simple_surv[i,"model"]) %>%
assign(model,.,envir=,.GlobalEnv)
# crear índices de ajuste
fit_flexsurvreg0<-rbind(fit_flexsurvreg0,
cbind(dist= dists_simple_surv[i,"formal"],
AIC= get(model)$AIC,
Llik= get(model)$loglik,
npars= get(model)$npars,
pars= get(model)$AIC/2 + get(model)$loglik,
BIC= get(model)$loglik+ log(get(model)$npars)* (get(model)$AIC/2 + get(model)$loglik)
)
)
}
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
mod_surv_simple_km <-survfit(Surv(outcome_to_readmission, readmission==1)~ 1,
data = subset(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr, comp_status != "Censored",
conf.type = "log-log") %>% dplyr::mutate(outcome_to_readmission=ifelse(outcome_to_readmission==0,.000001,outcome_to_readmission)))
#layout(matrix(1:8, nc = 2, byrow = F))
plot(mod_surv_simple_km, col="red", xlim=c(1,11),ylim=c(.4,1),conf.int = F)
lines(mod_surv_simple_weibull, col="navyblue",ci = F)
lines(mod_surv_simple_weibullph, col="#B89673",ci = F)
lines(mod_surv_simple_llogis, col="#A0A36D",ci = F)
lines(mod_surv_simple_gamma, col="#886894",ci = F)
lines(mod_surv_simple_gengamma, col="darkorchid4",ci = F)
lines(mod_surv_simple_gompertz, col="#496A72",ci = F)
lines(mod_surv_simple_lnorm, col="gray70",ci = F)
lines(mod_surv_simple_exp,col="gray15",ci = F)
legend("bottomleft",
legend = c("Kaplan-Meier","Weibull (AFT)", "Weibull (PH)",
"Gompertz", "Log-logistic", "Gamma",
"Generalized gamma", "Lognormal", "Exponential"), col =
c("red","navyblue","#B89673","#A0A36D","#886894",
"darkorchid4","#496A72","gray70","gray15"),
title = "Distributions", cex = .95, bty = "n", lty=1)# lty = 1:2,
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#goes out for 15 years with cycles of one month
tt15<-seq(0,15,1/12)
newtime = seq(from=1/24, to= 15, by=1/24)
dist_w_best_fit<-
fit_flexsurvreg0%>%arrange(AIC)%>%dplyr::left_join(dists_simple_surv, by=c("dist"="formal"))%>%slice(1)%>%dplyr::select(model)%>%as.character()
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
dt_coefs_simple_surv<-
data.frame(
real_vars=c("tipo_de_programa_2Women specific", "edad_al_ing_grupos.L", "edad_al_ing_grupos.Q", "edad_al_ing_grupos.C","escolaridad_rec.L", "escolaridad_rec.Q", "sus_principal_modCocaine hydrochloride", "sus_principal_modCocaine paste", "sus_principal_modMarijuana", "sus_principal_modOther", "freq_cons_sus_prin.L", "freq_cons_sus_prin.Q", "freq_cons_sus_prin.C", "freq_cons_sus_prin^4", "compromiso_biopsicosocial.L", "compromiso_biopsicosocial.Q", "tenencia_de_la_vivienda_modOthers", "tenencia_de_la_vivienda_modOwner/Transferred dwellings/Pays Dividends", "tenencia_de_la_vivienda_modRenting", "tenencia_de_la_vivienda_modStays temporarily with a relative", "num_otras_sus_mod.L", "num_otras_sus_mod.Q", "numero_de_hijos_mod_recYes", "tipo_de_plan_resResidential", "comp_statusDischarge without clinical advice", "tipo_de_programa_2Women specific:comp_statusDischarge without clinical advice"),
formal_vars= c('Type of program-Women specific', 'Age at admission to treatment, grouped- 30-39', 'Age at admission to treatment, grouped- 40-49', 'Age at admission to treatment, grouped- 50+', 'Ed. Attainment- Completed high school or less', 'Ed. Attainment- More than high school', 'Primary or main substance- Cocaine hydrochloride', 'Primary or main substance- Cocaine paste', 'Primary or main substance- Marijuana', 'Primary or main substance- Other', 'Consumption frequency of primary or main substance- 2 to 3 days a week', 'Consumption frequency of primary or main substance- 4 to 6 days a week', 'Consumption frequency of primary or main substance- 1 day a week or more', 'Consumption frequency of primary or main substance- Daily', 'Biopsychosocial involvement- 2-Moderate', 'Biopsychosocial involvement- 3-Severe', 'Tenure status of households- Others', 'Tenure status of households- Owner/Transferred dwellings/Pays Dividends', 'Tenure status of households- Renting', 'Tenure status of households- Stays temporarily with a relative', 'Co-occurring SUD- One additional substance', 'Co-occurring SUD- More than one additional substance', 'Have children (Dichotomized)- Yes', 'Setting of Treatment- Residential', 'Cause of Discharge- W/o clinical advice', 'Type of Program x Cause of Discharge')#'Educational Attainment', tipo_de_plan_resResidential
)
flexsurv_1<-
flexsurvreg(formula=fitform0b,
# si habían tiempos 0, los cambié por .000001 para que no hayan casos absolutamente continuos
# saqué los casos censurados
data = subset(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr, comp_status != "Censored") %>% dplyr::mutate(outcome_to_readmission=ifelse(outcome_to_readmission==0,.000001,outcome_to_readmission)),
dist = dist_w_best_fit
)
#effect = "distp") # distp for distribution parameters (name can be changed)
#tipo_de_programa_2Women specific:comp_statusDischarge without clinical advice
note_flexsurv_1 <-
paste0("N= ",format(as.numeric(flexsurv_1$N), big.mark=","),"; Events= ",format(as.numeric(flexsurv_1$events), big.mark=","),"; Censored= ",format(as.numeric(flexsurv_1$N - flexsurv_1$events), big.mark=","),"; Time at risk= ",format(as.numeric(flexsurv_1$trisk), big.mark=","),"; Df= ",format(as.numeric(flexsurv_1$npars), big.mark=","), "; AIC= ",format(as.numeric(flexsurv_1$AIC), big.mark=","))
#<div style="border: 1px solid #ddd; padding: 5px; overflow-y: scroll; height:350px; overflow-x: scroll; width:100%">
data.table::data.table(round(exp(flexsurv_1$res),2)[,1:3],keep.rownames = T) %>%
dplyr::left_join(dt_coefs_simple_surv, by=c("rn"="real_vars")) %>%
#dplyr::mutate(p.value=ifelse(p.value<.001,"<0.001",as.character(sprintf("%1.3f",p.value)))) %>%
dplyr::mutate(conf.low=sprintf("%1.2f",`L95%`),
conf.high=sprintf("%1.2f",`U95%`),
statistic=sprintf("%1.2f",est),
`CI 95%`=paste0(conf.low,", ",conf.high)) %>%
dplyr::filter(!rn %in% c("shape","scale")) %>%
dplyr::select(formal_vars, statistic, `CI 95%`) %>%
knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
caption=paste0("Table 5. Coefficients, ",fit_flexsurvreg0%>%arrange(AIC)%>%dplyr::left_join(dists_simple_surv, by=c("dist"="formal"))%>%slice(1) %>% dplyr::select(1)," parametric distribution"),
col.names = c("Term","Hazard","CI 95%"),
align= c("l",rep('c', 6)))%>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size=11)%>%
kableExtra::add_footnote(note_flexsurv_1, notation = "none") %>%
kableExtra::scroll_box(width = "100%", height = "350px")
Table 5. Coefficients, Weibull (AFT) parametric distribution
|
Term
|
Hazard
|
CI 95%
|
|
Age at admission to treatment, grouped- 30-39
|
1.68
|
1.40, 2.01
|
|
Age at admission to treatment, grouped- 40-49
|
1.29
|
1.11, 1.49
|
|
Age at admission to treatment, grouped- 50+
|
0.94
|
0.83, 1.07
|
|
Ed. Attainment- Completed high school or less
|
1.36
|
1.18, 1.57
|
|
Ed. Attainment- More than high school
|
1.22
|
1.10, 1.35
|
|
Primary or main substance- Cocaine hydrochloride
|
0.75
|
0.62, 0.90
|
|
Primary or main substance- Cocaine paste
|
0.65
|
0.55, 0.76
|
|
Primary or main substance- Marijuana
|
1.22
|
0.92, 1.62
|
|
Primary or main substance- Other
|
1.82
|
1.23, 2.71
|
|
Consumption frequency of primary or main substance- 2 to 3 days a week
|
0.77
|
0.62, 0.96
|
|
Consumption frequency of primary or main substance- 4 to 6 days a week
|
1.21
|
1.00, 1.48
|
|
Consumption frequency of primary or main substance- 1 day a week or more
|
0.85
|
0.69, 1.04
|
|
Consumption frequency of primary or main substance- Daily
|
1.15
|
0.97, 1.37
|
|
Biopsychosocial involvement- 2-Moderate
|
0.76
|
0.61, 0.93
|
|
Biopsychosocial involvement- 3-Severe
|
1.00
|
0.88, 1.14
|
|
Tenure status of households- Others
|
0.93
|
0.53, 1.63
|
|
Tenure status of households- Owner/Transferred dwellings/Pays Dividends
|
0.90
|
0.57, 1.43
|
|
Tenure status of households- Renting
|
0.94
|
0.59, 1.51
|
|
Tenure status of households- Stays temporarily with a relative
|
0.97
|
0.62, 1.53
|
|
Co-occurring SUD- One additional substance
|
0.78
|
0.70, 0.88
|
|
Co-occurring SUD- More than one additional substance
|
1.02
|
0.93, 1.12
|
|
Have children (Dichotomized)- Yes
|
0.85
|
0.70, 1.03
|
|
Setting of Treatment- Residential
|
0.36
|
0.31, 0.42
|
|
Type of program-Women specific
|
0.62
|
0.48, 0.79
|
|
Cause of Discharge- W/o clinical advice
|
0.54
|
0.45, 0.66
|
|
Type of Program x Cause of Discharge
|
1.49
|
1.14, 1.94
|
|
N= 17,110; Events= 4,659; Censored= 12,451; Time at risk= 59,988.58; Df= 28; AIC= 29,043.31
|
#</div>
#"baseline.real" which transforms the baseline distribution parameters to the real number line used for estimation.
#"coefs.exp" which exponentiates the covariate effects.
library(flexsurv)
newdat <- data.table::data.table(tipo_de_programa_2= factor(c(rep("Women specific",2),rep("General population",2))),
comp_status= factor(rep(c("Therapeutic discharge","Discharge without clinical advice"),2)),
edad_al_ing_grupos= factor(rep("50+",4)),
escolaridad_rec= factor(rep("1-More than high school",4)),
sus_principal_mod= factor(rep("Marijuana",4)),
freq_cons_sus_prin= factor(rep("2 to 3 days a week",4)),
compromiso_biopsicosocial= factor(rep("1-Mild",4)),
tenencia_de_la_vivienda_mod= factor(rep("Owner/Transferred dwellings/Pays Dividends",4)),
num_otras_sus_mod= factor(rep("No additional substance",4)),
numero_de_hijos_mod_rec= factor(rep("No",4)),
tipo_de_plan_res= factor(rep("Outpatient",4))
)
newdat2 <- data.table::data.table(tipo_de_programa_2= factor(c(rep("Women specific",2),rep("General population",2))),#factor(c(rep("Women specific",2),rep("General population",2))),
comp_status= factor(rep(c("Therapeutic discharge","Discharge without clinical advice"),2)),
edad_al_ing_grupos= factor(rep("30-39",4)),
escolaridad_rec= factor(rep("2-Completed high school or less",4)),
sus_principal_mod= factor(rep("Alcohol",4)),
freq_cons_sus_prin= factor(rep("Less than 1 day a week",4)),
compromiso_biopsicosocial= factor(rep("2-Moderate",4)),
tenencia_de_la_vivienda_mod= factor(rep("Stays temporarily with a relative",4)),
num_otras_sus_mod= factor(rep("One additional substance",4)),
numero_de_hijos_mod_rec= factor(rep("Yes",4)),
tipo_de_plan_res= factor(rep("Outpatient",4))
)
haz_covs1 <- summary(flexsurv_1, t= newtime, newdata = newdat, type = "hazard", tidy = TRUE) %>%
dplyr::mutate(int=factor(paste0(tipo_de_programa_2, " - ", comp_status))) %>%
dplyr::mutate(tooltip= paste0("Days after finishing treatments: ", round(time*365.25),0))
haz_covs2 <- summary(flexsurv_1, t= newtime, newdata = newdat2, type = "hazard", tidy = TRUE) %>%
dplyr::mutate(int=factor(paste0(tipo_de_programa_2, " - ", comp_status))) %>%
dplyr::mutate(tooltip= paste0("Days after finishing treatments: ", round(time*365.25),0))
plot_haz_covs<-
ggplot(haz_covs1, aes(x = time, y = est, col = int)) + #[c(2,66),]
geom_line() +
xlab("Days") + ylab("Hazard") +
scale_color_discrete(name = "") +
theme(legend.position = "bottom")+
sjPlot::theme_sjplot()
#+ xlim(c(.1,10))+ ylim(c(0,.2))
plot_haz_covs2<-
ggplot(haz_covs1) + #[c(2,66),]
geom_line_interactive(aes(x = time, y = est, col = int, group=int, tooltip=tooltip)) +
geom_ribbon_interactive(aes(x=time, ymin=lcl, ymax=ucl, fill= int, group=int),alpha=.3)+
xlab("Time (in years)") + ylab("Hazard") +
theme(legend.position = "bottom")+
scale_fill_manual(name="Terms", values = c("gray70","#B89673","#A0A36D","#886894","darkorchid4","#496A72","gray70","#496A72")) +
scale_color_manual(name="Terms", values = c("gray70","#B89673","#A0A36D","#886894","darkorchid4","#496A72","#496A72")) +
sjPlot::theme_sjplot()+
theme(legend.position="bottom",
legend.title=element_text(size=9),
legend.text=element_text(size=7))+
guides(color=guide_legend(ncol=2),fill=guide_legend(ncol=2))
tooltip_css <- "background-color:gray;color:white;font-style:italic;padding:10px;border-radius:10px 20px 10px 20px;"
ggiraph(code = {print(plot_haz_covs2)}, tooltip_extra_css = tooltip_css, tooltip_opacity = .75)
Competing risks
We estimated the cumulative incidence (risk over time) of each outcome of treatment type in the presence of the other event types.
time_aft_inc_rate<-Sys.time()
paste0("Time taken in process: ");time_aft_inc_rate-time_bef_inc_rate
## [1] "Time taken in process: "
## Time difference of 11.38845 secs
#############################################################################
# #
# CUMULATIVE INCIDENCE CURVES IN R #
# #
# Written by Luca Scrucca #
# #
# Reference: #
# Scrucca L., Santucci A., Aversa F. (2007) Competing risks analysis using #
# R: an easy guide for clinicians. Bone Marrow Transplantation, 40, #
# 381--387. #
#############################################################################
# ver. 1.2 Apr 2018
# - improved format for p-value
# ver. 1.1 Feb 2008
# - allow group to be missing
# - if t is provided both computation and plots use t as time points
# - allow col, lwd to be used for curves with confidence bands
# - fix some bugs in the legend
# - added help on source code
# ver. 1.0 May 2007
# - Version appearing in the BMT paper
#############################################################################
#
# Usage:
#
# CumIncidence(ftime, fstatus, group, t, strata, rho = 0, cencode = 0,
# subset, na.action = na.omit, level,
# xlab = "Time", ylab = "Probability",
# col, lty, lwd, digits = 4)
#
# Arguments:
#
# ftime = failure time variable.
# fstatus = variable with distinct codes for different causes of
# failure and also a distinct code for censored observations.
# group = estimates will be calculated within groups given by distinct
# values of this variable. Tests will compare these groups. If
# missing then treated as all one group (no test statistics).
# t = a vector of time points where the cumulative incidence function
# should be evaluated.
# strata = stratification variable. Has no effect on estimates. Tests
# will be stratified on this variable. (all data in 1 stratum,
# if missing).
# rho = power of the weight function used in the tests. By default is
# set to 0.
# cencode = value of fstatus variable which indicates the failure time
# is censored.
# subset = a logical vector specifying a subset of cases to include in
# the analysis.
# na.action = a function specifying the action to take for any cases
# missing any of ftime, fstatus, group, strata, or subset.
# By default missing cases are omitted.
# level = a value in the range [0,1] specifying the level for pointwise
# confidence bands.
# xlab = text for the x-axis label.
# ylab = text for the y-axis label.
# col = color(s) used for plotting curves (see plot.default).
# lty = line type(s) used for plotting curves (see plot.default).
# lwd = line width(s) used for plotting curves (see plot.default).
# digits = number of significant digits used for printing values. By
# default set at 4.
#
#############################################################################
"CumIncidence" <- function(ftime, fstatus, group, t, strata, rho = 0,
cencode = 0, subset, na.action = na.omit, level,
xlab = "Time", ylab = "Probability",
col, lty, lwd, digits = 4)
{
# check for the required package
if(!require("cmprsk"))
{ stop("Package `cmprsk' is required and must be installed.\n
See help(install.packages) or write the following command at prompt
and then follow the instructions:\n
> install.packages(\"cmprsk\")") }
# collect data
mf <- match.call(expand.dots = FALSE)
mf[[1]] <- as.name("list")
mf$t <- mf$digits <- mf$col <- mf$lty <- mf$lwd <- mf$level <-
mf$xlab <- mf$ylab <- NULL
mf <- eval(mf, parent.frame())
g <- max(1, length(unique(mf$group)))
s <- length(unique(mf$fstatus))
if(missing(t))
{ time <- pretty(c(0, max(mf$ftime)), 6)
ttime <- time <- time[time < max(mf$ftime)] }
else { ttime <- time <- t }
# fit model and estimates at time points
fit <- do.call("cuminc", mf)
tfit <- timepoints(fit, time)
# print result
cat("\n+", paste(rep("-", 67), collapse=""), "+", sep ="")
cat("\n| Cumulative incidence function estimates from competing risks data |")
cat("\n+", paste(rep("-", 67), collapse=""), "+\n", sep ="")
tests <- NULL
if(g > 1)
{
tests <- data.frame(fit$Tests[,c(1,3,2)], check.names = FALSE)
colnames(tests) <- c("Statistic", "df", "p-value")
tests$`p-value` <- format.pval(tests$`p-value`)
cat("Test equality across groups:\n")
print(tests, digits = digits)
}
cat("\nEstimates at time points:\n")
print(tfit$est, digits = digits)
cat("\nStandard errors:\n")
print(sqrt(tfit$var), digits = digits)
#
if(missing(level))
{ # plot cumulative incidence functions
if(missing(t))
{ time <- sort(unique(c(ftime, time)))
x <- timepoints(fit, time) }
else x <- tfit
col <- if(missing(col)) rep(1:(s-1), rep(g,(s-1))) else col
lty <- if(missing(lty)) rep(1:g, s-1) else lty
lwd <- if(missing(lwd)) rep(1, g*(s-1)) else lwd
# matplot(time, base::t(x$est), type="s", ylim = c(0,1),
# xlab = xlab, ylab = ylab, xaxs="i", yaxs="i",
# col = col, lty = lty, lwd = lwd)
# legend("topleft", legend = rownames(x$est), x.intersp = 2,
# bty = "n", xjust = 1, col = col, lty = lty, lwd = lwd)
out <- list(test = tests, est = tfit$est, se = sqrt(tfit$var))
}
else
{ if(level < 0 | level > 1)
error("level must be a value in the range [0,1]")
# compute pointwise confidence intervals
oldpar <- par(ask=TRUE)
on.exit(par(oldpar))
if(missing(t))
{ time <- sort(unique(c(ftime, time)))
x <- timepoints(fit, time) }
else x <- tfit
z <- qnorm(1-(1-level)/2)
lower <- x$est ^ exp(-z*sqrt(x$var)/(x$est*log(x$est)))
upper <- x$est ^ exp(z*sqrt(x$var)/(x$est*log(x$est)))
col <- if(missing(col)) rep(1:(s-1), rep(g,(s-1)))
else rep(col, g*(s-1))
lwd <- if(missing(lwd)) rep(1, g*(s-1))
else rep(lwd, g*(s-1))
# plot pointwise confidence intervals
# for(j in 1:nrow(x$est))
# { matplot(time, cbind(x$est[j,], lower[j,], upper[j,]), type="s",
# xlab = xlab, ylab = ylab, xaxs="i", yaxs="i",
# ylim = c(0,1), col = col[j], lwd = lwd[j], lty = c(1,3,3))
# legend("topleft", legend = rownames(x$est)[j], bty = "n", xjust = 1) }
# print pointwise confidence intervals
i <- match(ttime, time)
ci <- array(NA, c(2, length(i), nrow(lower)))
ci[1,,] <- base::t(lower[,i])
ci[2,,] <- base::t(upper[,i])
dimnames(ci) <- list(c("lower", "upper"), ttime, rownames(lower))
cat(paste("\n", level*100, "% pointwise confidence intervals:\n\n", sep=""))
print(ci, digits = digits)
out <- list(test = tests, est = x$est, se = sqrt(tfit$var), ci = ci)
}
# return results
invisible(out)
}
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
## Calculate the grouped cumulative incidence functions (CIF)
cuminc_comp_status_0<-
CumIncidence (CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr$time_to_outcome,
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr$comp_status,
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr$tipo_de_programa_2,
cencode = "Censored",
xlab="Time (in years)",
level=.95)
##
## +-------------------------------------------------------------------+
## | Cumulative incidence function estimates from competing risks data |
## +-------------------------------------------------------------------+
## Test equality across groups:
## Statistic df p-value
## Therapeutic discharge 3.692 1 0.05469
## Discharge without clinical advice 8.686 1 0.00321
##
## Estimates at time points:
## 0 2 4
## General population Therapeutic discharge 0.0012141 0.2451 0.2699
## Women specific Therapeutic discharge 0.0000000 0.2598 0.2821
## General population Discharge without clinical advice 0.0007588 0.5855 0.5976
## Women specific Discharge without clinical advice 0.0004879 0.5805 0.5869
## 6 8 10
## General population Therapeutic discharge 0.2701 0.2701 0.2701
## Women specific Therapeutic discharge 0.2824 0.2824 0.2824
## General population Discharge without clinical advice 0.5976 0.5976 0.5976
## Women specific Discharge without clinical advice 0.5869 0.5869 0.5869
##
## Standard errors:
## 0 2
## General population Therapeutic discharge 0.0003033 0.003963
## Women specific Therapeutic discharge 0.0000000 0.005132
## General population Discharge without clinical advice 0.0002399 0.004471
## Women specific Discharge without clinical advice 0.0002439 0.005657
## 4 6 8
## General population Therapeutic discharge 0.004127 0.004128 0.004128
## Women specific Therapeutic discharge 0.005317 0.005323 0.005323
## General population Discharge without clinical advice 0.004478 0.004478 0.004478
## Women specific Discharge without clinical advice 0.005670 0.005670 0.005670
## 10
## General population Therapeutic discharge 0.004128
## Women specific Therapeutic discharge 0.005323
## General population Discharge without clinical advice 0.004478
## Women specific Discharge without clinical advice 0.005670
##
## 95% pointwise confidence intervals:
##
## , , General population Therapeutic discharge
##
## 0 2 4 6 8 10
## lower 0.0007305 0.2374 0.2619 0.2620 0.2620 0.2620
## upper 0.0019469 0.2529 0.2780 0.2782 0.2782 0.2782
##
## , , Women specific Therapeutic discharge
##
## 0 2 4 6 8 10
## lower NaN 0.2498 0.2717 0.2720 0.2720 0.2720
## upper NaN 0.2699 0.2926 0.2929 0.2929 0.2929
##
## , , General population Discharge without clinical advice
##
## 0 2 4 6 8 10
## lower 0.0003973 0.5766 0.5887 0.5887 0.5887 0.5887
## upper 0.0013738 0.5942 0.6063 0.6063 0.6063 0.6063
##
## , , Women specific Discharge without clinical advice
##
## 0 2 4 6 8 10
## lower 0.0001715 0.5693 0.5757 0.5757 0.5757 0.5757
## upper 0.0012235 0.5915 0.5979 0.5979 0.5979 0.5979
cuminc_comp_status<-
cuminc(ftime= CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr$time_to_outcome,
fstatus= CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr$comp_status,
group=CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr$tipo_de_programa_2,
cencode = "Censored")
## CIF and the variance of point estimates
timepoints_cuminc_comp<-timepoints(cuminc_comp_status, times = seq(0,10,2))
timepoints_cuminc_comp_table<-
timepoints_cuminc_comp$est %>%
data.table(keep.rownames = T) %>%
dplyr::left_join(data.table(timepoints_cuminc_comp$var,keep.rownames = T), by="rn") %>%
melt() %>%
separate("variable",c("var","est")) %>%
reshape(idvar= c("rn", "var"),timevar="est", direction="wide") %>%
dplyr::mutate(value=paste0(sprintf("%.2f",value.x)," (",sprintf("%.2f",value.y),")"))
#plot(cuminc_comp_status, col = 1:4, lwd = 3, lty = 1, xlab = "Time (in years)")
timepoints_cuminc_comp_table[,c(1,2,5)] %>%
reshape(idvar= c("rn"),timevar="var", direction="wide") %>%
knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
caption="Table 6. CIF and variance of point estimates",
align= c("l",rep('c', 6)), col.names = c("Group & Outcome","0","2", "4","6","8","10"))%>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size=11)%>%
kableExtra::scroll_box(width= "100%", height = "250x")
Table 6. CIF and variance of point estimates
|
Group & Outcome
|
0
|
2
|
4
|
6
|
8
|
10
|
|
General population Therapeutic discharge
|
0.00 (0.00)
|
0.25 (0.00)
|
0.27 (0.00)
|
0.27 (0.00)
|
0.27 (0.00)
|
0.27 (0.00)
|
|
Women specific Therapeutic discharge
|
0.00 (0.00)
|
0.26 (0.00)
|
0.28 (0.00)
|
0.28 (0.00)
|
0.28 (0.00)
|
0.28 (0.00)
|
|
General population Discharge without clinical advice
|
0.00 (0.00)
|
0.59 (0.00)
|
0.60 (0.00)
|
0.60 (0.00)
|
0.60 (0.00)
|
0.60 (0.00)
|
|
Women specific Discharge without clinical advice
|
0.00 (0.00)
|
0.58 (0.00)
|
0.59 (0.00)
|
0.59 (0.00)
|
0.59 (0.00)
|
0.59 (0.00)
|
#at year 10, the estimated marginal probability of discharge w/o clinical advice was 59%
plot_incidence_cum<-
as.data.table(cuminc_comp_status_0$ci) %>%
data.frame() %>%
reshape(idvar= c("V2", "V3"),timevar="V1", direction="wide") %>%
dplyr::mutate(V2=as.numeric(V2)) %>%
ggplot()+
geom_ribbon(aes(x=V2, ymin = value.lower, ymax = value.upper, fill=V3),alpha=.4)+
sjPlot::theme_sjplot()+
ylim(0,1)+
guides(fill=guide_legend(ncol=2))+
scale_y_continuous(breaks = seq(0,.65, .1),
labels = scales::percent,
limits = c(0, .65)) +
theme(legend.position="bottom")+
scale_fill_manual(name="Type of\nprogram &\nOutcome",values=c("cornflowerblue","brown3","bisque4","darkolivegreen"))+
labs(x="Time (in years)",y="Probability")
plot_incidence_cum
if(no_mostrar==1){
jpeg("C:/Users/andre/Desktop/SUD_CL/eso7.jpg", height=10, width= 10, res= 96, units = "in")
plot_incidence_cum
dev.off()
}
time_bef_cmprsk<-Sys.time()
First, the print out shows the results of Gray’s test for equality of CIFs across groups. In our example, cumulative incidence curves for types of programs are not statistically different for Therapeutic discharge (3.69(1)= 0.055), but women in WE programs seem to be more likely to be Discharged without clinical advice than women in GP (8.69(1)= 0.003).
#<div style="border: 1px solid #ddd; padding: 5px; overflow-y: scroll; height:350px; overflow-x: scroll; width:100%">
## Regression (crr can only take a covariate matrix)
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5326634/
# Para ver residuos
# https://www.nature.com/articles/bmt2009359.pdf?origin=ppub
covars <- model.matrix(~edad_al_ing_grupos+ escolaridad_rec+ sus_principal_mod+
freq_cons_sus_prin+ compromiso_biopsicosocial+ tenencia_de_la_vivienda_mod+
num_otras_sus_mod+ numero_de_hijos_mod_rec+ factor(tipo_de_programa_2)+ tipo_de_plan_res, CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr)[,-1]
bmtDisMat <- matrix(
cbind(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr$tipo_de_programa_2,
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr$tipo_de_plan_res,
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr$estado_conyugal_2,
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr$edad_al_ing_grupos,
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr$escolaridad_rec,
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr$sus_principal_mod,
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr$freq_cons_sus_prin,
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr$compromiso_biopsicosocial,
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr$tenencia_de_la_vivienda_mod,
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr$num_otras_sus_mod,
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr$numero_de_hijos_mod_rec
),ncol=11)
colnames(bmtDisMat) <- c("program","plan","marital status","age at admission","educational attainment","substance","frequency of subs use","biopsychosoc comp", "housing cond", "other subs", "kids")
#"estado_conyugal_2","edad_al_ing_grupos","escolaridad_rec","sus_principal_mod", "freq_cons_sus_prin","compromiso_biopsicosocial","tenencia_de_la_vivienda_mod","num_otras_sus_mod","numero_de_hijos_mod_rec"
resCrrRelByDis <- cmprsk::crr(ftime= CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr$time_to_outcome, # vector of failure/censoring times
fstatus= CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_irr$comp_status, # vector with a unique code for each failure type and censoring
cov1= covars, # matrix (nobs x ncovs) of fixed covariates
## cov2 = , # matrix of covariates that will be multiplied by functions of time
## tf = , # functions of time
## cengroup = , # vector with different values for each group with a distinct censoring distribution
failcode = "Therapeutic discharge", # code of fstatus that denotes the failure type of interest
cencode = "Censored" # code of fstatus that denotes censored observations
)
#~15 min. in a DELL 3ghz 32gb ram
#The first argument to the function wald.test() is the estimated covariance matrix for the coefficients, followed
#by the vector of coefficients estimates, and the position of coefficients for which we want to assess significance
#aod::wald.test(resCrrRelByDis$var, resCrrRelByDis$coef, Terms = 1:3)
#aod::wald.test(resCrrRelByDis$var, resCrrRelByDis$coef, Terms = 6:9)
#aod::wald.test(resCrrRelByDis$var, resCrrRelByDis$coef, Terms = 10:13)
#aod::wald.test(resCrrRelByDis$var, resCrrRelByDis$coef, Terms = 16:19) #no diffs in tennence in every levels
#
#Phase is a factor with relapse as baseline, so each P-value provides a test for the difference
#of each level with respect to the baseline. An overall Pvalue for Phase (the overall P-value is always required
#when modeling a factor with more than two levels), can be obtained through the Wald test.
# ESTO PERMITE VER SI UNA VRIABLE FACTOR TIENE DIFERENCIAS EN TODOS SUS NIVELES
data.table::data.table(summary(resCrrRelByDis)$conf.int, keep.rownames = T) %>%
dplyr::left_join(dt_coefs_simple_surv, by=c("rn"="real_vars"))%>%
dplyr::mutate(formal_vars= ifelse(rn=="factor(tipo_de_programa_2)Women specific","Type of Program- Women specific",formal_vars))%>%
dplyr::mutate(conf.low=sprintf("%1.2f",`2.5%`),
conf.high=sprintf("%1.2f",`97.5%`),
`exp(coef)`=sprintf("%1.2f",`exp(coef)`),
`exp(-coef)`=sprintf("%1.2f",`exp(-coef)`),
`CI 95%`=paste0(conf.low,", ",conf.high)) %>%
dplyr::filter(!rn %in% c("shape","scale")) %>%
dplyr::select(formal_vars, `exp(coef)`, `exp(-coef)`, `CI 95%`) %>%
knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
caption=paste0("Table 7. Competing Risks (Treatment Outcomes, Failure Type of Interest= Therapeutic Discharge) Regression Coefficients"),
col.names = c("Term","Hazard", "Inv. Hazard", "CI 95%"),
align= c("l",rep('c', 6)))%>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size=11)%>%
kableExtra::add_footnote(note_flexsurv_1, notation = "none") %>%
kableExtra::scroll_box(width = "100%", height = "340px")
Table 7. Competing Risks (Treatment Outcomes, Failure Type of Interest= Therapeutic Discharge) Regression Coefficients
|
Term
|
Hazard
|
Inv. Hazard
|
CI 95%
|
|
Age at admission to treatment, grouped- 30-39
|
1.50
|
0.67
|
1.40, 1.61
|
|
Age at admission to treatment, grouped- 40-49
|
1.02
|
0.98
|
0.96, 1.08
|
|
Age at admission to treatment, grouped- 50+
|
1.01
|
0.99
|
0.96, 1.07
|
|
Ed. Attainment- Completed high school or less
|
1.36
|
0.74
|
1.28, 1.44
|
|
Ed. Attainment- More than high school
|
0.99
|
1.01
|
0.95, 1.04
|
|
Primary or main substance- Cocaine hydrochloride
|
0.72
|
1.39
|
0.66, 0.78
|
|
Primary or main substance- Cocaine paste
|
0.62
|
1.62
|
0.57, 0.67
|
|
Primary or main substance- Marijuana
|
0.92
|
1.09
|
0.82, 1.04
|
|
Primary or main substance- Other
|
1.05
|
0.95
|
0.93, 1.18
|
|
Consumption frequency of primary or main substance- 2 to 3 days a week
|
0.86
|
1.16
|
0.79, 0.94
|
|
Consumption frequency of primary or main substance- 4 to 6 days a week
|
1.05
|
0.95
|
0.97, 1.14
|
|
Consumption frequency of primary or main substance- 1 day a week or more
|
0.86
|
1.16
|
0.79, 0.93
|
|
Consumption frequency of primary or main substance- Daily
|
0.94
|
1.07
|
0.87, 1.01
|
|
Biopsychosocial involvement- 2-Moderate
|
0.85
|
1.18
|
0.78, 0.92
|
|
Biopsychosocial involvement- 3-Severe
|
0.97
|
1.03
|
0.92, 1.03
|
|
Tenure status of households- Others
|
0.95
|
1.05
|
0.70, 1.30
|
|
Tenure status of households- Owner/Transferred dwellings/Pays Dividends
|
1.08
|
0.93
|
0.84, 1.39
|
|
Tenure status of households- Renting
|
0.99
|
1.01
|
0.77, 1.29
|
|
Tenure status of households- Stays temporarily with a relative
|
1.01
|
0.99
|
0.79, 1.31
|
|
Co-occurring SUD- One additional substance
|
0.90
|
1.11
|
0.86, 0.95
|
|
Co-occurring SUD- More than one additional substance
|
1.06
|
0.94
|
1.02, 1.11
|
|
Have children (Dichotomized)- Yes
|
0.89
|
1.12
|
0.82, 0.97
|
|
Type of Program- Women specific
|
1.22
|
0.82
|
1.15, 1.30
|
|
Setting of Treatment- Residential
|
1.19
|
0.84
|
1.10, 1.30
|
|
N= 17,110; Events= 4,659; Censored= 12,451; Time at risk= 59,988.58; Df= 28; AIC= 29,043.31
|
#resCrrRelByDis$loglik.null
#resCrrRelByDis$loglik
#inverse hazard ratio
#tidy(resCrrRelByDis)
#MODEL SELECTION
#modsel.crr(mod1, mod2, mod3, mod4, mod5, mod6)
#Hacer distintos modelos y compararlos
#</div>
time_aft_cmprsk<-Sys.time()
#https://rpubs.com/kaz_yos/cmprsk2
paste0("Time taken in process: ");time_aft_cmprsk-time_bef_cmprsk
## [1] "Time taken in process: "
## Time difference of 17.7861 mins
par(mfrow = c(4,6))
for(j in 1:ncol(resCrrRelByDis$res))
scatter.smooth(resCrrRelByDis$uft,
resCrrRelByDis$res[,j],
main = names(resCrrRelByDis$coef)[j],
xlab = "Failure time",
ylab = "Schoenfeld residuals")
#Schoenfeld residuals against failure time for each covariate. It is noted that the residuals follows a non-constant distribution across failure times, indicating a potential violation to the proportional assumption.
#the PH assumption can be assessed by fitting a LOESS curve to the plot. A straight line passing through a residual value of 0 with gradient 0 indicates that the variable satisfies the PH assumption and therefore does not depend on time.
Multi-state
Transition matrix
The model schematic illustrates treatment progression and the possible transitions between the states and the model structure.
library(igraph)
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
## 3 ESTADOS SIMPLES ##
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
links<-data.frame(stringsAsFactors=FALSE,
from = c(rep("Admitted to\nTreatment", 2),"Therapeutic\nDischarge"),
to = c(rep(c("Therapeutic\nDischarge"),1), rep("Readmission", 2)),
value = c("1","2","3"))
links2<-data.frame(stringsAsFactors=FALSE,
from = c(rep("Admitted to\nTreatment", 3),"Therapeutic\nDischarge","Discharge w/o\nClinical Advice"),
to = c(rep(c("Therapeutic\nDischarge","Discharge w/o\nClinical Advice"),1), rep("Readmission", 3)),
value = c("1","2","3","4","5"))
#dev.off()
#https://www.r-graph-gallery.com/248-igraph-plotting-parameters.html
#https://rstudio-pubs-static.s3.amazonaws.com/341807_7b9d1a4e787146d492115f90ad75cd2d.html
par(mfrow=c(1, 2))
#for (i in c(2560:2660)){
set.seed(2630) #i #2660 #2646 #2642 #2630 #2650
co <- layout.fruchterman.reingold(graph_from_data_frame(links, directed=TRUE))
plot(graph_from_data_frame(links, directed=TRUE),
asp = 0,
layout= co,
edge.label=links$value,
edge.label.cex=3,
edge.label.color="black",
#vertex.label= rev(),
vertex.color="white",
vertex.size=120,
vertex.size2=25,
vertex.label.cex=1,
edge.arrow.size=1,
edge.color="black",
vertex.shape="rectangle",
vertex.label.color="black",
edge.curved=0,
edge.width=1.5,
#main=paste0(i),
outputorder="edgesfirst",
vertex.label.dist=0,
vertex.cex = 3)
#}
title("a) Three-states Model (Simplest)", sub = "No recurring states; Absorving state: Readmission;\nOther causes of discharge were not events of interest;\nModified version of an illness-death model") ## internal titles
set.seed(10990)#i #10990 10921 10898 10835
co2 <- layout.fruchterman.reingold(graph_from_data_frame(links2, directed=TRUE))
plot(graph_from_data_frame(links2, directed=TRUE),
asp = 0,
layout= co2,
edge.label=links2$value,
edge.label.cex=2.5,
edge.label.color="black",
#vertex.label= rev(),
vertex.color="white",
vertex.size=120,
vertex.size2=25,
vertex.label.cex=1,
edge.arrow.size=1,
edge.color="black",
vertex.shape="rectangle",
vertex.label.color="black",
edge.curved=0,
edge.width=1.5,
outputorder="edgesfirst",
#main=paste0(i),
vertex.label.dist=0,
vertex.cex = 3)
title("b) Four-states Model", sub = "No recurring states; Absorving state: Readmission;\nOther causes of discharge were not events of interest") ## internal titles

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
## Probando con paquetes estadísticos
if(no_mostrar==1){
library(igraph)
Nodes <- c("Admitted to\nGP","Admitted to\nWE","Therapeutic\nDischarge","Discharge w/o\nClinical Advice","Readmission") #states possible in MSM
Edges <- list("Admitted to\nGP"=list(edges=c("Therapeutic\nDischarge","Discharge w/o\nClinical Advice")),
"Admitted to\nWE"=list(edges=c("Therapeutic\nDischarge","Discharge w/o\nClinical Advice")),
"Therapeutic\nDischarge"=list(edges=c("Readmission")),
"Discharge /wo\nClinical Advice"=list(edges=c("Readmission")),
"Readmission"=list(edges=NULL)) #transitions from each state
RCLTtree <- new("graphNEL",nodes=Nodes,edgeL=Edges,edgemode="directed")
plot(RCLTtree)
#https://www.rdocumentation.org/packages/msSurv/versions/1.2-2/topics/msSurv
msSurv(LTRCdata, RCLTtree, cens.type="ind", LT=FALSE, bs=FALSE, B=200)
}
Any observation where an event occurs which is not the event of interest for a specific transition is treated as a censored at the end of the study (2019, November 13) observation (only referrals and also had inexact dates of discharge), that is, in the same way as a patient that was lost to follow-up. If there is a readmission posterior to loss-to follow-up cases, these cases we obtained the length in days between being readmitted posterior and the time of admission, knowing that any intermediate date of discharge was interval-censored (the exact time in which users discharged is unknown, and its treatment outcomes are unknown, so there is no exact time-to-readmission). Covariates are fixed at baseline. Some transitions were shown to be simultaneous (n= 132). Small adjustment such that transitions were sequential rather than simultaneous by adding one day to the absorbing event, and subtracting a day if the transition corresponded to an intermediate state.
#Data should be in a data frame with column names "id", "stop", "st.stage", and "stage" where "id" is the individual's identification number, "stop" is the transition time from state j to j', "st.stage" is the state the individual is transitioning from (i.e., j), and "stage" is the state the individual is transitioning to (i.e., j') and equals 0 if right censored.
## 80% of sample is LT, rest has start time of 0
### AGS: Parten en 0, salvo que estén truncados a la izquierda.
### Parece que todos comparten un mismo tiempo ojetivo.
### AGS: Cuando hay un estado seguido no es necesario interval censoring, se dn en tun tiempo continuo
### El 0 es censura
library(mstate)
#### MSPREP= 3 STATES
#_#_#_#_#_#_#_
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep<-
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados %>%
# 2021-03-24: Eliminate cases with readmissions posterior to interval-censored discharges
#dplyr::mutate(cens=ifelse(motivodeegreso_mod_imp=="Referral to another treatment"& !is.na(fech_ing_next_treat),1,0)) %>% dplyr::filter(cens==0) %>%
dplyr::mutate(ther_disch=ifelse(motivodeegreso_mod_imp=="Therapeutic discharge",1,0)) %>%
# para problema para intervalos 0 entre un tratamiento nuevo y otro y entre el ingreso a un tratamiento y su término y que comento más abajo
dplyr::mutate(cambio_fecha_ing= dplyr::case_when(dias_treat_imp_sin_na==0~1 ,T~0)) %>%
dplyr::mutate(fech_ing_num= dplyr::case_when(cambio_fecha_ing==1~fech_ing_num-1,T~fech_ing_num)) %>%
dplyr::mutate(dias_treat_imp_sin_na= dplyr::case_when(cambio_fecha_ing==1~fech_egres_num-fech_ing_num, T~dias_treat_imp_sin_na)) %>%
dplyr::select(-cambio_fecha_ing) %>%
#If status=1, the corresponding transition has been observed. Si no no se ha observado
# para efectos de este modelo simple, experimentar el alta terapéutica es el objetivo, por lo que el resto será censura
dplyr::mutate(readmission=ifelse(!is.na(fech_ing_next_treat),1,0)) %>%
#time of arrival at the state
dplyr::mutate(diff_bet_treat=fech_ing_next_treat-fech_egres_num) %>%
# para problema para intervalos 0 entre un tratamiento nuevo y otro y entre el ingreso a un tratamiento y su término y que comento más abajo. Le añado un día
dplyr::mutate(cambio_fecha_ing_nuevo_t= dplyr::case_when(diff_bet_treat==0~1 ,T~0)) %>%
dplyr::mutate(fech_ing_next_treat= dplyr::case_when(cambio_fecha_ing_nuevo_t==1~fech_ing_next_treat+1,T~fech_ing_next_treat)) %>%
dplyr::mutate(diff_bet_treat= dplyr::case_when(cambio_fecha_ing_nuevo_t==1~fech_ing_next_treat-fech_egres_num, T~diff_bet_treat)) %>%
dplyr::select(-cambio_fecha_ing_nuevo_t) %>%
#dplyr::filter(diff_bet_treat_corr!=diff_bet_treat)
#ADD DATES TO THE FINAL FOLLOW UP OF THE STUDY IF CENSORED
dplyr::mutate(dias_treat_imp_sin_na= dplyr::case_when(ther_disch==0~ as.numeric(as.Date("2019-11-13"))-fech_ing_num,T~dias_treat_imp_sin_na)) %>%
## 2021-03-24, I had to reespecify times to objective times, in order to avoid further problems
dplyr::mutate(date_ther_disch= dias_treat_imp_sin_na, #fech_ing_num+
## 2021-03-24, The posterior treatment has to include the days in the previous treatment
date_post_treat= dplyr::case_when(ther_disch==1 & readmission==1~ dias_treat_imp_sin_na+ diff_bet_treat,
#if the first time is censored,
ther_disch==0 & readmission==1~ fech_ing_next_treat-fech_ing_num,
readmission==0~ as.numeric(as.Date("2019-11-13"))-fech_ing_num)) %>%
## EL RESTO DE LOS PACIENTES NO VAN A HABER REGISTRADO EVENTOS EN ESE TIEMPO, POR LO QUE LLEGARÁN AL FINAL DEL SEGUIMIENTO
dplyr::select(row, fech_ing_num, date_ther_disch, ther_disch, date_post_treat, readmission, tipo_de_programa_2:numero_de_hijos_mod_rec,tipo_de_plan_res) %>%
as.data.frame()
#### MSPREP= 4 STATES
#_#_#_#_#_#_#_
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep2<-
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados %>%
# 2021-03-24: Eliminate cases with readmissions posterior to interval-censored discharges
#dplyr::mutate(cens=ifelse(motivodeegreso_mod_imp=="Referral to another treatment"& !is.na(fech_ing_next_treat),1,0)) %>% dplyr::filter(cens==0) %>%
dplyr::mutate(ther_disch=ifelse(motivodeegreso_mod_imp=="Therapeutic discharge",1,0)) %>%
# For problems with 0 intervals between a new treatment and other between the admission and discharge, I subtract 1 day of admission. Then, and if there is a change in the date, we replace it. If not, we maintain the original value
dplyr::mutate(cambio_fecha_ing= dplyr::case_when(dias_treat_imp_sin_na==0~1 ,T~0)) %>%
dplyr::mutate(fech_ing_num= dplyr::case_when(cambio_fecha_ing==1~fech_ing_num-1,T~fech_ing_num)) %>%
dplyr::mutate(dias_treat_imp_sin_na= dplyr::case_when(cambio_fecha_ing==1~fech_egres_num-fech_ing_num, T~dias_treat_imp_sin_na)) %>%
dplyr::select(-cambio_fecha_ing) %>%
#If status=1, the corresponding transition has been observed. If not, it assumes not observed
dplyr::mutate(disch_wo_clin_adv=ifelse(motivodeegreso_mod_imp %in% c("Early Drop-out","Late Drop-out","Administrative discharge"),1,0)) %>%
dplyr::mutate(readmission=ifelse(!is.na(fech_ing_next_treat),1,0)) %>%
#time of arrival at the state of readmission
dplyr::mutate(diff_bet_treat=fech_ing_next_treat-fech_egres_num) %>%
# For problems with 0 intervals between a new treatment and other between the admission and discharge, I add 1 day of admission to the next treatment. Then, and if there is a change in the date, we replace it. If not, we maintain the original value
dplyr::mutate(cambio_fecha_ing_nuevo_t= dplyr::case_when(diff_bet_treat==0~1 ,T~0)) %>%
dplyr::mutate(fech_ing_next_treat= dplyr::case_when(cambio_fecha_ing_nuevo_t==1~fech_ing_next_treat+1,T~fech_ing_next_treat)) %>%
dplyr::mutate(diff_bet_treat= dplyr::case_when(cambio_fecha_ing_nuevo_t==1~fech_ing_next_treat-fech_egres_num, T~diff_bet_treat)) %>%
dplyr::select(-cambio_fecha_ing_nuevo_t) %>%
#dplyr::filter(diff_bet_treat_corr!=diff_bet_treat)
#ADD DATES TO THE FINAL FOLLOW UP OF THE STUDY IF CENSORED
dplyr::mutate(days_to_ther_disch= dplyr::case_when(ther_disch==0~ as.numeric(as.Date("2019-11-13"))-fech_ing_num,T~dias_treat_imp_sin_na)) %>%
dplyr::mutate(days_to_disch_wo_clin_adv= dplyr::case_when(disch_wo_clin_adv==0~ as.numeric(as.Date("2019-11-13"))-fech_ing_num,T~dias_treat_imp_sin_na)) %>%
## 2021-03-24, I had to reespecify times to objective times, in order to avoid further problems
dplyr::mutate(date_ther_disch= days_to_ther_disch, #fech_ing_num+
date_disch_wo_clin_adv= days_to_disch_wo_clin_adv,
## 2021-03-24, The posterior treatment has to include the days in the previous treatment
date_post_treat= dplyr::case_when(ther_disch==1 & readmission==1~ days_to_ther_disch+ diff_bet_treat,
disch_wo_clin_adv==1 & readmission==1~ days_to_disch_wo_clin_adv+ diff_bet_treat,
#if the first time is censored,
disch_wo_clin_adv==0 & ther_disch==0 & readmission==1~ fech_ing_next_treat-fech_ing_num,
ther_disch==1 & disch_wo_clin_adv==0 & readmission==1~ fech_ing_next_treat-fech_ing_num,
readmission==0~ as.numeric(as.Date("2019-11-13"))-fech_ing_num)) %>%
## EL RESTO DE LOS PACIENTES NO VAN A HABER REGISTRADO EVENTOS EN ESE TIEMPO, POR LO QUE LLEGARÁN AL FINAL DEL SEGUIMIENTO
dplyr::select(row, fech_ing_num, date_ther_disch, ther_disch, date_disch_wo_clin_adv, disch_wo_clin_adv, date_post_treat, readmission, tipo_de_programa_2:numero_de_hijos_mod_rec,tipo_de_plan_res) %>%
as.data.frame()
tail(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep) %>%
knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
caption="Table 8a. Data in Wide, 3-states",
align= c("c",rep('c', 5)))%>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 11)%>%
kableExtra::add_footnote("Note= Proportions from the initial state") %>%
kableExtra::scroll_box(width = "100%", height = "350px")
Table 8a. Data in Wide, 3-states
|
|
row
|
fech_ing_num
|
date_ther_disch
|
ther_disch
|
date_post_treat
|
readmission
|
tipo_de_programa_2
|
estado_conyugal_2
|
edad_al_ing_grupos
|
escolaridad_rec
|
sus_principal_mod
|
freq_cons_sus_prin
|
compromiso_biopsicosocial
|
tenencia_de_la_vivienda_mod
|
num_otras_sus_mod
|
numero_de_hijos_mod_rec
|
tipo_de_plan_res
|
|
21373
|
18,172
|
15,187
|
3,026
|
0
|
1,761
|
1
|
General population
|
Single
|
30-39
|
3-Completed primary school or less
|
Cocaine paste
|
Daily
|
2-Moderate
|
Stays temporarily with a relative
|
One additional substance
|
Yes
|
Outpatient
|
|
21374
|
41,467
|
15,873
|
2,340
|
0
|
275
|
1
|
Women specific
|
Married/Shared living arrangements
|
18-29
|
3-Completed primary school or less
|
Cocaine paste
|
Daily
|
2-Moderate
|
Illegal Settlement
|
More than one additional substance
|
Yes
|
Residential
|
|
21375
|
16,343
|
15,114
|
3,099
|
0
|
2,244
|
1
|
Women specific
|
Single
|
18-29
|
2-Completed high school or less
|
Alcohol
|
2 to 3 days a week
|
3-Severe
|
Renting
|
More than one additional substance
|
Yes
|
Residential
|
|
21376
|
139,357
|
17,688
|
525
|
0
|
525
|
0
|
Women specific
|
Single
|
18-29
|
2-Completed high school or less
|
Cocaine paste
|
Daily
|
3-Severe
|
Stays temporarily with a relative
|
More than one additional substance
|
No
|
Residential
|
|
21377
|
24,900
|
15,345
|
349
|
1
|
2,868
|
0
|
Women specific
|
Single
|
18-29
|
1-More than high school
|
Alcohol
|
Daily
|
2-Moderate
|
Owner/Transferred dwellings/Pays Dividends
|
One additional substance
|
Yes
|
Outpatient
|
|
21378
|
41,118
|
15,874
|
2,339
|
0
|
2,339
|
0
|
Women specific
|
Married/Shared living arrangements
|
30-39
|
2-Completed high school or less
|
Cocaine paste
|
Daily
|
3-Severe
|
Stays temporarily with a relative
|
No additional substance
|
Yes
|
Residential
|
|
a Note= Proportions from the initial state
|
tail(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep2) %>%
knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
caption="Table 8b. Data in Wide, 4-states",
align= c("c",rep('c', 5)))%>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 11)%>%
kableExtra::add_footnote("Note= Proportions from the initial state") %>%
kableExtra::scroll_box(width = "100%", height = "350px")
Table 8b. Data in Wide, 4-states
|
|
row
|
fech_ing_num
|
date_ther_disch
|
ther_disch
|
date_disch_wo_clin_adv
|
disch_wo_clin_adv
|
date_post_treat
|
readmission
|
tipo_de_programa_2
|
estado_conyugal_2
|
edad_al_ing_grupos
|
escolaridad_rec
|
sus_principal_mod
|
freq_cons_sus_prin
|
compromiso_biopsicosocial
|
tenencia_de_la_vivienda_mod
|
num_otras_sus_mod
|
numero_de_hijos_mod_rec
|
tipo_de_plan_res
|
|
21373
|
18,172
|
15,187
|
3,026
|
0
|
304
|
1
|
1,761
|
1
|
General population
|
Single
|
30-39
|
3-Completed primary school or less
|
Cocaine paste
|
Daily
|
2-Moderate
|
Stays temporarily with a relative
|
One additional substance
|
Yes
|
Outpatient
|
|
21374
|
41,467
|
15,873
|
2,340
|
0
|
2,340
|
0
|
275
|
1
|
Women specific
|
Married/Shared living arrangements
|
18-29
|
3-Completed primary school or less
|
Cocaine paste
|
Daily
|
2-Moderate
|
Illegal Settlement
|
More than one additional substance
|
Yes
|
Residential
|
|
21375
|
16,343
|
15,114
|
3,099
|
0
|
115
|
1
|
2,244
|
1
|
Women specific
|
Single
|
18-29
|
2-Completed high school or less
|
Alcohol
|
2 to 3 days a week
|
3-Severe
|
Renting
|
More than one additional substance
|
Yes
|
Residential
|
|
21376
|
139,357
|
17,688
|
525
|
0
|
56
|
1
|
525
|
0
|
Women specific
|
Single
|
18-29
|
2-Completed high school or less
|
Cocaine paste
|
Daily
|
3-Severe
|
Stays temporarily with a relative
|
More than one additional substance
|
No
|
Residential
|
|
21377
|
24,900
|
15,345
|
349
|
1
|
2,868
|
0
|
2,868
|
0
|
Women specific
|
Single
|
18-29
|
1-More than high school
|
Alcohol
|
Daily
|
2-Moderate
|
Owner/Transferred dwellings/Pays Dividends
|
One additional substance
|
Yes
|
Outpatient
|
|
21378
|
41,118
|
15,874
|
2,339
|
0
|
38
|
1
|
2,339
|
0
|
Women specific
|
Married/Shared living arrangements
|
30-39
|
2-Completed high school or less
|
Cocaine paste
|
Daily
|
3-Severe
|
Stays temporarily with a relative
|
No additional substance
|
Yes
|
Residential
|
|
a Note= Proportions from the initial state
|
#For censored state transitions it can be awkward having to replicate the censoring time for each non-visited state
#https://github.com/stulacy/multistateutils
mat_3_states <- trans.illdeath(names=c("Admission","Therapeutic\nDischarge","Readmission"))
#All possible paths through the multi-state model can be found here:
#paths(mat_3_states)
#Los números son determinados por posición en cada columna (o eje Y).
#Si uno quiere definir para la otra fila, salta al siguiente vector:
mat_4_states<- transMat(list(c(2,3,4), 4, 4, c()),
names = c("Admission", "Therapeutic\nDischarge", "Discharge Without\nMedical Advice", "Readmission"))
#illness-death model without recovery
ms_CONS_C1_SEP_2020_women_imputed <- msprep(time = c(NA, "date_ther_disch", "date_post_treat"),
status = c(NA, "ther_disch", "readmission"),
data = CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep,
id = "row",
trans = mat_3_states,
keep = c("tipo_de_programa_2","estado_conyugal_2","edad_al_ing_grupos","escolaridad_rec","sus_principal_mod", "freq_cons_sus_prin","compromiso_biopsicosocial","tenencia_de_la_vivienda_mod","num_otras_sus_mod","numero_de_hijos_mod_rec","tipo_de_plan_res"))
#model of 5 states without recovery.
ms2_CONS_C1_SEP_2020_women_imputed <- msprep(time = c(NA, "date_ther_disch",
"date_disch_wo_clin_adv", "date_post_treat"),
status = c(NA, "ther_disch", "disch_wo_clin_adv", "readmission"),
data = CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep2,
id = "row",
trans = mat_4_states,
keep = c("tipo_de_programa_2","estado_conyugal_2","edad_al_ing_grupos","escolaridad_rec","sus_principal_mod", "freq_cons_sus_prin","compromiso_biopsicosocial","tenencia_de_la_vivienda_mod","num_otras_sus_mod","numero_de_hijos_mod_rec","tipo_de_plan_res"))
#Starting from state 1, simultaneous transitions possible for subjects 36666 36586 56465 136847 37595 60609 51706 76376 117544 140210 at times 126 472 32 36 1 203 45 14 5 71; smallest receiving state chosen
invisible(c("This problem responds to differences between treatments 0. Should be resolved in the initial stages"))
if(no_mostrar==1){
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep2 %>%
dplyr::filter(row %in% unlist(
ms2_CONS_C1_SEP_2020_women_imputed %>%
dplyr::filter(Tstop<=Tstart) %>%
dplyr::select(row,from,to,trans,Tstart,Tstop,time,status) %>%
distinct(row)
)) %>%
#dplyr::mutate(diff_bet_treat=fech_ing_next_treat-fech_egres_num)%>%
View()
}
if(no_mostrar==1){
CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados %>%
dplyr::filter(row %in% unlist(
ms2_CONS_C1_SEP_2020_women_imputed %>%
dplyr::filter(Tstop<=Tstart) %>%
dplyr::select(row,from,to,trans,Tstart,Tstop,time,status) %>%
distinct(row)
)) %>%
dplyr::select(row, motivodeegreso_mod_imp, contains("fech"))
}
invisible(c("Investigate warning"))
#37595 Administrative discharge 2013-03-18 2013-03-19 2013-03-19
#
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#SCALE INTERVALS TO YEARS
ms_CONS_C1_SEP_2020_women_imputed[, c("Tstart", "Tstop", "time")] <- ms_CONS_C1_SEP_2020_women_imputed[, c("Tstart", "Tstop", "time")]/365.25
ms2_CONS_C1_SEP_2020_women_imputed[, c("Tstart", "Tstop", "time")] <- ms2_CONS_C1_SEP_2020_women_imputed[, c("Tstart", "Tstop", "time")]/365.25
Then we present the transition with the frequencies and proportions.
data.frame(events(ms_CONS_C1_SEP_2020_women_imputed)$Frequencies) %>%
dplyr::filter(to!="total entering") %>%
left_join(data.frame(events(ms_CONS_C1_SEP_2020_women_imputed)$Proportions), by=c("from", "to")) %>%
dplyr::rename("Frequencies"="Freq.x", "Proportions"="Freq.y") %>%
dplyr::arrange(from, to) %>%
dplyr::mutate(diff=ifelse(as.character(from)!=as.character(to),0,1)) %>%
dplyr::filter(diff==0) %>%
dplyr::select(-diff) %>%
dplyr::mutate(Proportions=scales::percent(Proportions)) %>%
knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
caption="Table 9a. Empirical State Transition Matrix, 3 States Model",
align= c("c",rep('c', 5)))%>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 11)%>%
kableExtra::add_footnote("Note= Proportions from the initial state") %>%
kableExtra::scroll_box(width = "100%", height = "350px")
Table 9a. Empirical State Transition Matrix, 3 States Model
|
from
|
to
|
Frequencies
|
Proportions
|
|
Admission
|
Therapeutic Discharge
|
5,114
|
23.92%
|
|
Admission
|
Readmission
|
4,348
|
20.34%
|
|
Admission
|
no event
|
11,916
|
55.74%
|
|
Therapeutic Discharge
|
Admission
|
0
|
0.00%
|
|
Therapeutic Discharge
|
Readmission
|
1,063
|
20.79%
|
|
Therapeutic Discharge
|
no event
|
4,051
|
79.21%
|
|
Readmission
|
Admission
|
0
|
0.00%
|
|
Readmission
|
Therapeutic Discharge
|
0
|
0.00%
|
|
Readmission
|
no event
|
5,411
|
100.00%
|
|
a Note= Proportions from the initial state
|
data.frame(events(ms2_CONS_C1_SEP_2020_women_imputed)$Frequencies) %>%
dplyr::filter(to!="total entering") %>%
left_join(data.frame(events(ms2_CONS_C1_SEP_2020_women_imputed)$Proportions), by=c("from", "to")) %>%
dplyr::rename("Frequencies"="Freq.x", "Proportions"="Freq.y") %>%
dplyr::arrange(from, to) %>%
dplyr::mutate(diff=ifelse(as.character(from)!=as.character(to),0,1)) %>%
dplyr::filter(diff==0) %>%
dplyr::select(-diff) %>%
dplyr::mutate(Proportions=scales::percent(Proportions)) %>%
knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
caption="Table 9b. Empirical State Transition Matrix, 4 States Model",
align= c("c",rep('c', 5)))%>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 11)%>%
kableExtra::add_footnote("Note= Proportions from the initial state")%>%
kableExtra::scroll_box(width = "100%", height = "350px")
Table 9b. Empirical State Transition Matrix, 4 States Model
|
from
|
to
|
Frequencies
|
Proportions
|
|
Admission
|
Therapeutic Discharge
|
5,114
|
23.9%
|
|
Admission
|
Discharge Without Medical Advice
|
11,996
|
56.1%
|
|
Admission
|
Readmission
|
752
|
3.5%
|
|
Admission
|
no event
|
3,516
|
16.4%
|
|
Therapeutic Discharge
|
Admission
|
0
|
0.0%
|
|
Therapeutic Discharge
|
Discharge Without Medical Advice
|
0
|
0.0%
|
|
Therapeutic Discharge
|
Readmission
|
1,063
|
20.8%
|
|
Therapeutic Discharge
|
no event
|
4,051
|
79.2%
|
|
Discharge Without Medical Advice
|
Admission
|
0
|
0.0%
|
|
Discharge Without Medical Advice
|
Therapeutic Discharge
|
0
|
0.0%
|
|
Discharge Without Medical Advice
|
Readmission
|
3,596
|
30.0%
|
|
Discharge Without Medical Advice
|
no event
|
8,400
|
70.0%
|
|
Readmission
|
Admission
|
0
|
0.0%
|
|
Readmission
|
Therapeutic Discharge
|
0
|
0.0%
|
|
Readmission
|
Discharge Without Medical Advice
|
0
|
0.0%
|
|
Readmission
|
no event
|
5,411
|
100.0%
|
|
a Note= Proportions from the initial state
|
Consideration of the Appropriateness of the proportional hazards assumption
Continuous variables need to be categorised into groups. The plot described is also known as the log(−log(survival)) plot, as the cumulative hazard is equal to the negative logarithm of the survival proportion. This approach requires a subjective assessment (Bradburn, Clark, Love, et al., 2003).
#Bradburn, M., Clark, T., Love, S. et al. Survival Analysis Part III: Multivariate data analysis – choosing a model and assessing its adequacy and fit. Br J Cancer 89, 605–611 (2003). https://doi.org/10.1038/sj.bjc.6601120
#reescribo el tipo de programa para indicar 0 y 1s para tratamiento vs. control
if(dimnames(table(ms_CONS_C1_SEP_2020_women_imputed$tipo_de_programa_2))[[1]][1]=="General population"){
ms_CONS_C1_SEP_2020_women_imputed$tipo_de_programa_2<-ifelse(ms_CONS_C1_SEP_2020_women_imputed$tipo_de_programa_2=="Women specific",1,0)
}
if(dimnames(table(ms2_CONS_C1_SEP_2020_women_imputed$tipo_de_programa_2))[[1]][1]=="General population"){
ms2_CONS_C1_SEP_2020_women_imputed$tipo_de_programa_2<-ifelse(ms2_CONS_C1_SEP_2020_women_imputed$tipo_de_programa_2=="Women specific",1,0)
}
# ’expand.covs()’ permits to define the set of covariates which impacts
# each transition:
#NO SE SI CONVIENE TRAER ESTA FUNCION
#data_mstate <- expand.covs(ms2_CONS_C1_SEP_2020_women_imputed, names(CONS_C1_df_dup_SEP_2020_women_miss_after_imp_conservados_msprep)[-c(1:6)], append = TRUE, longnames = FALSE)
plots<- data.frame(title=rep(c("(a) Admission -> Ther. Disch.","(b) Admission -> Readmission","(c) Ther. Disch. -> Readmission"),1),trans=rep(1:3,1))
## SIN COVARIABLES
layout(matrix(1:6, nc = 2, byrow = F))
for(i in c(1:3)){
plot(log(survfit(Surv(time,status)~1, data=subset(ms_CONS_C1_SEP_2020_women_imputed, trans==plots[i,"trans"] & tipo_de_programa_2==0))$time),
log(-log(survfit(Surv(time,status)~1, data=subset(ms_CONS_C1_SEP_2020_women_imputed, trans==plots[i,"trans"] & tipo_de_programa_2==0))$surv)), type="l",
xlab="log(Years)", ylab="", xaxs="i",yaxs="i",
las=1,cex.lab=1.5, cex.axis=1.5)
lines(log(survfit(Surv(time,status)~1, data=subset(ms_CONS_C1_SEP_2020_women_imputed, trans==plots[i,"trans"] & tipo_de_programa_2==1))$time),
log(-log(survfit(Surv(time,status)~1, data=subset(ms_CONS_C1_SEP_2020_women_imputed, trans==plots[i,"trans"] & tipo_de_programa_2==1))$surv)), lty=2)
legend(0,-4, c("GP", "WE"), bty="n", lty=c(2,1), cex=1.5)
title(main=paste0("LOG CUMULATIVE HAZARD VS LOG TIME PLOT \n",plots[i,"title"]), cex.main=1.5)
}
for(i in c(1:3)){
plot(survfit(Surv(time,status)~1, data=subset(ms_CONS_C1_SEP_2020_women_imputed, trans==plots[i,"trans"] & tipo_de_programa_2==0))$time,
-log(survfit(Surv(time,status)~1, data=subset(ms_CONS_C1_SEP_2020_women_imputed, trans==plots[i,"trans"] & tipo_de_programa_2==0))$surv), type="l",
xlab="Years", ylab="", xaxs="i",yaxs="i",
las=1,cex.lab=1.5, cex.axis=1.5, col=1)
lines(survfit(Surv(time,status)~1, data=subset(ms_CONS_C1_SEP_2020_women_imputed, trans==plots[i,"trans"] & tipo_de_programa_2==1))$time,
-log(survfit(Surv(time,status)~1, data=subset(ms_CONS_C1_SEP_2020_women_imputed, trans==plots[i,"trans"] & tipo_de_programa_2==1))$surv), lty=2)
legend(6,.1, c("GP", "WE"), bty="n", lty=c(2,1), cex=1.5)
title(main=paste0("CUMULATIVE HAZARD PLOT: -LOG(KM SURVIVAL) \n",plots[i,"title"]), cex.main=1.5)
}
As seen n the Figure above, the cumulative hazards does not follow a proportional trend in the transitions of (b) Admission → Readmission, and (c) Ther. Disch. → Readmission between General population or Women-specific programs.
plots2<- data.frame(title=rep(c("(a) Admission -> Ther. Disch.","(b) Admission -> Discharge w/o Med. Adv.","(c) Admission -> Readmission", "(d) Ther. Disch. -> Readmission","(e) Discharge w/o Med. Adv. -> Readmission"),1),trans=1:5)
layout(matrix(1:10, nc = 2, byrow = F))
for(i in c(1:5)){
plot(log(survfit(Surv(time,status)~1, data=subset(ms2_CONS_C1_SEP_2020_women_imputed, trans==plots2[i,"trans"] & tipo_de_programa_2==0))$time),
log(-log(survfit(Surv(time,status)~1, data=subset(ms2_CONS_C1_SEP_2020_women_imputed, trans==plots2[i,"trans"] & tipo_de_programa_2==0))$surv)), type="l",
xlab="log(Years)", ylab="", xaxs="i",yaxs="i",
las=1,cex.lab=1.5, cex.axis=1.5)
lines(log(survfit(Surv(time,status)~1, data=subset(ms2_CONS_C1_SEP_2020_women_imputed, trans==plots2[i,"trans"] & tipo_de_programa_2==1))$time),
log(-log(survfit(Surv(time,status)~1, data=subset(ms2_CONS_C1_SEP_2020_women_imputed, trans==plots2[i,"trans"] & tipo_de_programa_2==1))$surv)), lty=2)
legend(0,-4, c("GP", "WE"), bty="n", lty=c(2,1), cex=1.5)
title(main=paste0("LOG CUMULATIVE HAZARD VS LOG TIME PLOT \n",plots2[i,"title"]), cex.main=1.5)
}
for(i in c(1:5)){
plot(survfit(Surv(time,status)~1, data=subset(ms2_CONS_C1_SEP_2020_women_imputed, trans==plots2[i,"trans"] & tipo_de_programa_2==0))$time,
-log(survfit(Surv(time,status)~1, data=subset(ms2_CONS_C1_SEP_2020_women_imputed, trans==plots2[i,"trans"] & tipo_de_programa_2==0))$surv), type="l",
xlab="Years", ylab="", xaxs="i",yaxs="i",
las=1,cex.lab=1.5, cex.axis=1.5, col=1)
lines(survfit(Surv(time,status)~1, data=subset(ms2_CONS_C1_SEP_2020_women_imputed, trans==plots2[i,"trans"] & tipo_de_programa_2==1))$time,
-log(survfit(Surv(time,status)~1, data=subset(ms2_CONS_C1_SEP_2020_women_imputed, trans==plots2[i,"trans"] & tipo_de_programa_2==1))$surv), lty=2)
legend(7.5,.23, c("GP", "WE"), bty="n", lty=c(2,1), cex=1.5)
title(main=paste0("CUMULATIVE HAZARD PLOT: -LOG(KM SURVIVAL) \n",plots2[i,"title"]), cex.main=1.5)
}
As seen in the figures above, the transition (b) Admission → Discharge w/o Med. Adv., did not show a proportional trajectory in time between women in the General-population and Women-specific programs in the clog-log plot. On the other part, the transitions (c) Admission → Readmission, (d) Ther. Disch. → Readmission, (e) Discharge w/o Med. Adv. → Readmission did not follow proportional cumulative hazards between women in the General-Population and Women-Specific programs.
#The Coxproportional hazards model can still be used, but the interpretation of the results is different. Thiswill be outlined in some detail in Section 3
#The effect of covariates on disease progression is most often modelled using the Cox proportionalhazards model.
#https://onlinelibrary.wiley.com/doi/epdf/10.1002/sim.2712
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#If the SR plot for a given variable shows deviation from a straight line while it stays flat for the rest of the variables, then it is something you shouldn't ignore. First thing you can do is to look at the results of the global test. The global test might indicate the overall assumption of PH holds true [or not]. If the global test is fine then switching the reference category of the variable for which the assumption didn't held true, you might be able to achieve PH. The hazards may be proportional when compared to one reference category but not the other. Hence, by switching the reference categories, you might be able to find the category which results in PH assumption being true.
#If the switching doesn't solve your problem, and assuming you have got the right variables in your model, then this indicates that the hazards is not proportional for this particular variable i.e. different hazards at different time points. Hence, you may want to introduce interaction between variable and time in your model.
#_________
#https://stats.stackexchange.com/questions/33615/schoenfeld-residuals?rq=1
#Judgement of proportional hazards(PH) should be based on the results from a formal statistical test and the Schoenfeld residuals (SR) plot together.
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#The best cures for some problems--running an experiment longer or doing more aggressive follow-up to avoid a large proportion of censored values, or using a large enough sample size to lessen the problems of small sample sizes--are outside the scope of statistical analysis per se.
#Alternative procedures:
#Stratification: Dividing the sample into homogeneous subsamples
#Parametric methods: Using methods that assume a specific survival distribution
#Choosing a particular nonparametric test: Situations favoring one of Mantel-Cox, Gehan-Breslow, or Tarone-Ware over the others
#Other nonparametric tests
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#Is it possible that the analysis could be valid despite the non-proportional hazards?
#As others have mentioned, large sample size may be a factor leading to the violation of the statistical test of the PH assumption. As @EdM mentioned, it will depend on your context/knowledge of the subject as to whether the deviations matter From page 1461 of this paper
#When the sample size is small, this method may lack power to detect deviations from PH; while for large sample sizes, hypothesis tests may be over sensitive to slight deviations from this assumption.
#The answer to this question suggests that the effect of fitting a Cox model with non-proportional hazards is "slightly less power" which can be recovered with robust standard errors, leading to the hazard ratios being interpreted as the time-weighted average of the hazard ratio
#This interesting paper (Why Test for Proportional Hazards?) was published recently highlighting that there are legitimate reasons to assume violation of PH, and that one of the effects is on the interpretation rather than invalidity of the results. They actually suggest that statistical testing of PH is unnecessary if it's expected that the hazard ratio varies over time. (I'd be interested to hear what others think of this paper)
#In the section "How should hazard ratios be interpreted?" on page 1402
#As a weighted average of the time-varying hazard ratios, the hazard ratio estimate from a Cox proportional hazards model is often used as a convenient summary of the treatment effect during the follow-up. However, a hazard ratio from a Cox model needs to be interpreted as a weighted average of the true hazard ratios over the entire follow-up period. The 95% confidence interval should be estimated using a valid method such as bootstrapping and also using inverse probability weighting to adjust for losses to follow-up.
#_________
#xtna (https://stats.stackexchange.com/users/307358/xtna), Violated non-proportional hazards - Cox regression model of time-dependent covariable, URL (version: 2021-01-15): https://stats.stackexchange.com/q/505055
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#The Cox proportional hazards model has traditionally been applied to assess the accuracy of prognostic models. However, it may be suboptimal due to the inflexibility to model the baseline survival function and when the proportional hazards assumption is violated
#_________
#Miladinovic B, Kumar A, Mhaskar R, Kim S, Schonwetter R, Djulbegovic B (2012) A Flexible Alternative to the Cox Proportional Hazards Model for Assessing the Prognostic Accuracy of Hospice Patient Survival. PLoS ONE 7(10): e47804. https://doi.org/10.1371/journal.pone.0047804
Decision whether to use Markov or Semi-Markov
For the three-states model, we only looked at transition 3 as only transition where different times entering the state. Interested in knowing the risk of readmission in completion of a treatment w/therapeutic discharge (status) from different treatments and different time to progression.
In the four-states model, we looked at transition 4 and 5.
#state arrival extended (semi-)Markov to mean that the i → j transition hazard depends on thetime of arrival at state i.
#Build a Cox proportional hazard model including treatment and time in previous state as covariates
CoxMarkov<-coxph(Surv(Tstart,Tstop,status)~factor(tipo_de_programa_2)+Tstart,
data=subset(ms_CONS_C1_SEP_2020_women_imputed, trans==3),method = "breslow")
## Warning in coxph(Surv(Tstart, Tstop, status) ~ factor(tipo_de_programa_2) + : a
## variable appears on both the left and right sides of the formula
HR<-round(exp(coef(CoxMarkov)),2)
CI<-round(exp(confint(CoxMarkov)),2)
P<-round(coef(summary(CoxMarkov))[,5],4)
colnames(CI)<-c("Lower 95% CI","Upper 95% CI")
#In Surv(Tstart, Tstop, status) : Stop time must be > start time, NA created
CoxMarkov2<-coxph(Surv(Tstart,Tstop,status)~factor(tipo_de_programa_2)+Tstart,
data=subset(ms2_CONS_C1_SEP_2020_women_imputed, trans %in% c(4,5)),method = "breslow")
## Warning in coxph(Surv(Tstart, Tstop, status) ~ factor(tipo_de_programa_2) + : a
## variable appears on both the left and right sides of the formula
HR2<-round(exp(coef(CoxMarkov2)),2)
CI2<-round(exp(confint(CoxMarkov2)),2)
P2<-round(coef(summary(CoxMarkov2))[,5],4)
tab_cox_markov<- as.data.frame(cbind(HR2,CI2,P2))
colnames(tab_cox_markov)<-c("HR","Lower 95% CI","Upper 95% CI", "P")
#Stop time must be > start time, NA created #INVESTIGATE - RESOLVED IN 2021-03-24
rbind.data.frame(as.data.frame(cbind(HR,CI,P)),tab_cox_markov) %>%
data.table(keep.rownames=T) %>%
dplyr::rename("Terms"="rn") %>%
dplyr::mutate(Terms=dplyr::case_when(grepl("tipo_de_", Terms)~"Type of Program",
grepl("Tstart",Terms)~"Time in previous state(in years)")) %>%
dplyr::mutate(P=ifelse(P==as.character("0"),"<.001",P)) %>%
dplyr::rename("Sig."="P") %>%
dplyr::mutate(`95% CIs`=paste0(`Lower 95% CI`,", ",`Upper 95% CI`)) %>%
dplyr::select(-`Lower 95% CI`,-`Upper 95% CI`) %>%
dplyr::select(Terms, HR, `95% CIs`, Sig.) %>%
knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
caption="Table 10. PH Model incluiding time in previous state & Type of Program as a covariate",
align= c("c",rep('c', 5)))%>%
kableExtra::pack_rows("Three-states", 1, 2) %>%
kableExtra::pack_rows("Four-states", 3, 4) %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 11)%>%
kableExtra::scroll_box(width = "100%", height = "350px")
Table 10. PH Model incluiding time in previous state & Type of Program as a covariate
|
Terms
|
HR
|
95% CIs
|
Sig.
|
|
Three-states
|
|
Type of Program
|
1.85
|
1.64, 2.09
|
<.001
|
|
Time in previous state(in years)
|
1.55
|
1.37, 1.76
|
<.001
|
|
Four-states
|
|
Type of Program
|
1.47
|
1.39, 1.56
|
<.001
|
|
Time in previous state(in years)
|
1.56
|
1.47, 1.66
|
<.001
|
#a variable appears on both the left and right sides of the formula
#this warning should be normal, since we are dealing with time to arrival at a determined state.
The model considered the transition from intermediate states to our absorbing state (being readmitted) is explained by the time spent in the previous health state. This covariate (time in the previous state) was shown to be statistically significant (p<.001); results indicated a longer duration spent in the first treatment is associated with increased risk of readmission. Therefore, a semi-Markov (called a Markov renewal model) or clock reset approach should be undertaken for both models. They bear the advantage that information from the process history can be included as transition-specific explanatory covariates.
#Since prtime only makes sense for transition 3 (PR → RelDeath), we need the transition-specific covariate
#of prtime for transition 3, which is prtime.3. The corresponding model is termed the ”state
#arrival extended Markov PH” model in the tutorial, and appears on the right of Table III.
ms_CONS_C1_SEP_2020_women_imputed$arrival<-ms_CONS_C1_SEP_2020_women_imputed$Tstart
ms2_CONS_C1_SEP_2020_women_imputed$arrival<-ms2_CONS_C1_SEP_2020_women_imputed$Tstart
ms_CONS_C1_SEP_2020_women_imputed_exp <- expand.covs(ms_CONS_C1_SEP_2020_women_imputed, "arrival", append = TRUE, longnames = FALSE)
ms2_CONS_C1_SEP_2020_women_imputed_exp <- expand.covs(ms2_CONS_C1_SEP_2020_women_imputed, "arrival", append = TRUE, longnames = FALSE)
Assessment of Fit of Transitions
We need to derive appropriate functional forms and define respective survival functions. One reason to favor patametric models is that they can be easier to generalize. Seven candidate distributions were considered to model transitions from Adm→Ther.Disch., Adm.→Readm. and Ther.Disch.→Readm., including Weibull (assume a monotonically increasing or decreasing hazard), Log-logistic (non-monotonic hazards), Gompertz (assume a monotonically increasing or decreasing hazard), Log-normal (non-monotonic hazards), Exponential (constant hazard), Gamma & Generalized gamma (more flexible).
The following plots fitted survival curves from each model (coloured lines), with the Kaplan-Meier estimate, in black, obtained from an example of Jackson available here, added to the contributions of Wathers & Cutler available here.
options(warn=-1)
n_iter<-10000
tiempo_antes_fits<-Sys.time()
#Weathers, Brandon and Cutler, Richard Dr., "Comparision of Survival Curves Between Cox Proportional
#Hazards, Random Forests, and Conditional Inference Forests in Survival Analysis" (2017). All Graduate
#Plan B and other Reports. 927.
#https://digitalcommons.usu.edu/gradreports/927
#<div style="border: 1px solid #ddd; padding: 5px; overflow-y: scroll; height:350px; overflow-x: scroll; width:100%">
#https://devinincerti.com/2019/01/01/sim-mstate.html
n_trans <- max(mat_3_states, na.rm = TRUE)
fits_wei <- vector(mode = "list", length = n_trans)
fits_weiph <- vector(mode = "list", length = n_trans)
fits_llogis <- vector(mode = "list", length = n_trans)
fits_gomp <- vector(mode = "list", length = n_trans)
fits_logn <- vector(mode = "list", length = n_trans)
fits_exp <- vector(mode = "list", length = n_trans)
fits_gam <- vector(mode = "list", length = n_trans)
fits_ggam <- vector(mode = "list", length = n_trans)
fits_c_wei <- vector(mode = "list", length = n_trans)
fits_c_weiph <- vector(mode = "list", length = n_trans)
fits_c_llogis <- vector(mode = "list", length = n_trans)
fits_c_gomp <- vector(mode = "list", length = n_trans)
fits_c_logn <- vector(mode = "list", length = n_trans)
fits_c_exp <- vector(mode = "list", length = n_trans)
fits_c_gam <- vector(mode = "list", length = n_trans)
fits_c_ggam <- vector(mode = "list", length = n_trans)
km.lc<-list()
fits_cox<-list()
fits_c_cox<-list()
#"gengamma" Generalized gamma (stable parameterisation)
#"gengamma.orig" Generalized gamma (original parameterisation)
#"genf" Generalized F (stable parameterisation)
#"genf.orig" Generalized F (original parameterisation)
#"weibull" Weibull
#"gamma" Gamma
#"exp" Exponential
#"lnorm" Log-normal
#"gompertz" Gompertz
library(flexsurv)
#Specify the formula
fitform <- Surv(time, status) ~ 1
for (i in 1:n_trans){
fits_wei[[i]] <- flexsurvreg(formula=fitform,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "weibull")
}
for (i in 1:n_trans){
fits_weiph[[i]] <- flexsurvreg(formula=fitform,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "weibullph")
}
for (i in 1:n_trans){
fits_llogis[[i]] <- flexsurvreg(formula=fitform,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "llogis")
}
for (i in 1:n_trans){
fits_gam[[i]] <- flexsurvreg(formula=fitform,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gamma")
}
#LOS ERRORES OCURREN CUANDO NO HAY COVARIABLES
#In (function (q, shape, rate = 1, scale = 1/rate, lower.tail = TRUE, ... : NaNs produced
#gamma no funcionó: NaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs produced
for (i in 1:n_trans){
fits_ggam[[i]] <- flexsurvreg(formula=fitform,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gengamma")
}
for (i in 1:n_trans){
fits_gomp[[i]] <- flexsurvreg(formula=fitform,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gompertz")
}
for (i in 1:n_trans){
fits_logn[[i]] <- flexsurvreg(formula=fitform,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "lnorm")
}
for (i in 1:n_trans){
fits_exp[[i]] <- flexsurvreg(formula=fitform,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "exp")
}
for (i in 1:n_trans){
fits_cox[[i]] <- coxph(fitform, data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i))
}
#_#_#_#_#_#_#_#_#_#_#_#_#_
#covariates
#Specify the formula
fitform2 <- Surv(time, status) ~ edad_al_ing_grupos+ escolaridad_rec+ sus_principal_mod+
freq_cons_sus_prin+ compromiso_biopsicosocial+ tenencia_de_la_vivienda_mod+
num_otras_sus_mod+ numero_de_hijos_mod_rec+ factor(tipo_de_programa_2)+ tipo_de_plan_res
for (i in 1:n_trans){
fits_c_wei[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "weibull")
}
for (i in 1:n_trans){
fits_c_weiph[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "weibullph")
}
for (i in 1:n_trans){
fits_c_llogis[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "llogis")
}
for (i in 1:n_trans){
fits_c_gam[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gamma")
}
#LOS ERRORES OCURREN CUANDO NO HAY COVARIABLES
#In (function (q, shape, rate = 1, scale = 1/rate, lower.tail = TRUE, ... : NaNs produced
#gamma no funcionó: NaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs produced
for (i in 1:n_trans){
fits_c_ggam[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gengamma")
}
for (i in 1:n_trans){
fits_c_gomp[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "gompertz")
}
for (i in 1:n_trans){
fits_c_logn[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "lnorm")
}
for (i in 1:n_trans){
fits_c_exp[[i]] <- flexsurvreg(formula=fitform2,
data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i),
dist = "exp")
}
for (i in 1:n_trans){
fits_c_cox[[i]] <- coxph(fitform2, data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i))
}
for (i in 1:n_trans){
km.lc[[i]] <- survfit(formula= fitform, data = subset(ms_CONS_C1_SEP_2020_women_imputed_exp, trans == i), conf.type = "log-log")
}
endTime <- Sys.time()
paste0("Time in process: ");endTime - tiempo_antes_fits
## [1] "Time in process: "
## Time difference of 12.30707 mins
transition_label<- c(`1`="Transition 1: Admission to Ther.Discharge",`2`="Transition 2: Admission to Readmission",`3`="Transition 3: Ther.Discharge to Readmission")
startTime <- Sys.time()
layout(matrix(1:3, nc = 1, byrow = F))
for (i in 1:n_trans){
plot(km.lc[[i]], col="black", xlim=c(1,12),ylim=c(.4,1), conf.int=F); lines(fits_wei[[i]], col="navyblue", ci=F);lines(fits_weiph[[i]], col="navyblue", ci=F);lines(fits_gomp[[i]], col="#B89673", ci=F);lines(fits_llogis[[i]], col="#A0A36D", ci=F);lines(fits_gam[[i]], col="#886894", ci=F);lines(fits_ggam[[i]], col="darkorchid4", ci=F);lines(fits_logn[[i]], col="#496A72", ci=F);lines(fits_logn[[i]], col="gray70", ci=F);lines(fits_exp[[i]],col="#496A72", ci=F)
legend("bottomleft", legend = c("Kaplan-Meier","Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
"Generalized gamma", "Lognormal", "Exponential"), col =
c("black","navyblue","#B89673","#A0A36D","#886894",
"darkorchid4","#496A72","gray70","#496A72"),
title = "Distributions", cex = .95, bty = "n", lty=1)# lty = 1:2,
title(main=transition_label[[i]])
}
endTime <- Sys.time()
paste0("Time in process: ");endTime - startTime
## [1] "Time in process: "
## Time difference of 0.2931552 secs
#23 min aprox.
#For more complicated models, users should specify what covariate values they want summaries for, rather than relying on the default
#</div>
options(warn=0)
Additionally, we compared the hazards of these distributions with non-parametric techniques, using a smoothed hazard function from right-censored data using kernel-based methods (as Incerti shows in his blog). The confidence interval around the predicted hazard function is estimated using bootstrapping methods of 10^{4} iterations. Confidence intervals are obtained by sampling randomly from the asymptotic normal distribution of the maximum likelihood estimates and then taking quantiles (see, e.g. Mandel (2013)).
#Micha Mandel (2013) Simulation-Based Confidence Intervals for Functions With Complicated Derivatives, The American Statistician, 67:2, 76-81, DOI: 10.1080/00031305.2013.783880
# https://rpubs.com/martina_morris/testhazard
tiempo_antes_fits2<-Sys.time()
newtime2 = seq(from=1/24, to= 15, by=1/12)
#```{r fit,eval=T, echo=T, paged.print=TRUE, fig.height=13, fig.width=10, fig.align="center", results = 'asis'}
kernel_haz_est<-list()
kernel_haz<-list()
for (i in 1:n_trans){
library("muhaz")
kernel_haz_est[[i]] <- muhaz(ms_CONS_C1_SEP_2020_women_imputed_exp[which(ms_CONS_C1_SEP_2020_women_imputed_exp$trans==i),"time"],
ms_CONS_C1_SEP_2020_women_imputed_exp[which(ms_CONS_C1_SEP_2020_women_imputed_exp$trans==i),"status"])
kernel_haz[[i]] <- data.table(time = kernel_haz_est[[i]]$est.grid,
est = kernel_haz_est[[i]]$haz.est,
dist = "Kernel density")
}
haz_int_only<-
cbind(trans=rep(1:n_trans,each=nrow(kernel_haz[[i]])),
rbindlist(kernel_haz))
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#create list of survreg for different transitions
#<- flexsurvreg_list(fit1, fit2, fit3)
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
dists_wo_covs <- cbind.data.frame(covs=c(rep("fits_",8)),
formal=rep(c("Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
"Generalized gamma", "Lognormal", "Exponential"),1),
model=rep(c("wei", "weiph", "gomp", "llogis", "gam","ggam", "logn", "exp"),1))
dists_w_covs <- cbind.data.frame(covs=c(rep("fits_c_",8)),
formal=rep(c("Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
"Generalized gamma", "Lognormal", "Exponential"),1),
model=rep(c("wei", "weiph", "gomp", "llogis", "gam","ggam", "logn", "exp"),1))
#hr.exp <- round(exp(coef(get(paste0("fits_",dists[x,"model"]))[[y]])["groupGood"]),3)
# Database to contrast adjustments
newdat2 <- data.table::data.table(tipo_de_programa_2= factor(c(rep(1,1),rep(0,1))),
#comp_status= factor(rep(c("Therapeutic discharge","Discharge without clinical advice"),2)),
edad_al_ing_grupos= factor(rep("50+",2)),
escolaridad_rec= factor(rep("1-More than high school",2)),
sus_principal_mod= factor(rep("Marijuana",2)),
freq_cons_sus_prin= factor(rep("2 to 3 days a week",2)),
compromiso_biopsicosocial= factor(rep("1-Mild",2)),
tenencia_de_la_vivienda_mod= factor(rep("Owner/Transferred dwellings/Pays Dividends",2)),
num_otras_sus_mod= factor(rep("No additional substance",2)),
numero_de_hijos_mod_rec= factor(rep("No",2)),
tipo_de_plan_res= factor(rep("Outpatient",2))
)
#WO COVS
fitted_flexsurvreg2a<-data.frame()
fit_flexsurvreg2a<-data.frame()
for (y in 1:n_trans){
for (x in 1:nrow(dists_wo_covs)){
cat(paste0("#### Flexible Survival Model (w/o covs): ",
dists_wo_covs[x,"formal"], "; transition: ",y, "\n \n"))
#Return fitted survival, cumulative hazard or hazard at a series of times from a fitted model
#
mod_flexsurv<-paste0(dists_wo_covs[x,"covs"],dists_wo_covs[x,"model"])
#FITTED LINES
#Lines
est_by_time<-
data.table::data.table(summary(get(mod_flexsurv)[[y]], ci = F, t= newtime2, B=10000,type = "hazard", tidy=T))
#dataframe
fitted_flexsurvreg2a<-rbind.data.frame(fitted_flexsurvreg2a,
cbind.data.frame(dist= rep(dists_wo_covs[x,"formal"],),
trans=rep(y,),
est_by_time))
#
fit_flexsurvreg2a<-rbind(fit_flexsurvreg2a,
cbind(dist= dists_wo_covs[x,"formal"], trans=y,
covs="w/o covs",
AIC= get(mod_flexsurv)[[y]]$AIC,
Llik= get(mod_flexsurv)[[y]]$loglik,
npars= get(mod_flexsurv)[[y]]$npars,
pars= get(mod_flexsurv)[[y]]$AIC/2 + get(mod_flexsurv)[[y]]$loglik,
BIC= get(mod_flexsurv)[[y]]$loglik+ log(get(mod_flexsurv)[[y]]$N)* (get(mod_flexsurv)[[y]]$AIC/2 + get(mod_flexsurv)[[y]]$loglik)
)
)
}
}
## #### Flexible Survival Model (w/o covs): Weibull (AFT); transition: 1
##
## #### Flexible Survival Model (w/o covs): Weibull (PH); transition: 1
##
## #### Flexible Survival Model (w/o covs): Gompertz; transition: 1
##
## #### Flexible Survival Model (w/o covs): Log-logistic; transition: 1
##
## #### Flexible Survival Model (w/o covs): Gamma; transition: 1
##
## #### Flexible Survival Model (w/o covs): Generalized gamma; transition: 1
##
## #### Flexible Survival Model (w/o covs): Lognormal; transition: 1
##
## #### Flexible Survival Model (w/o covs): Exponential; transition: 1
##
## #### Flexible Survival Model (w/o covs): Weibull (AFT); transition: 2
##
## #### Flexible Survival Model (w/o covs): Weibull (PH); transition: 2
##
## #### Flexible Survival Model (w/o covs): Gompertz; transition: 2
##
## #### Flexible Survival Model (w/o covs): Log-logistic; transition: 2
##
## #### Flexible Survival Model (w/o covs): Gamma; transition: 2
##
## #### Flexible Survival Model (w/o covs): Generalized gamma; transition: 2
##
## #### Flexible Survival Model (w/o covs): Lognormal; transition: 2
##
## #### Flexible Survival Model (w/o covs): Exponential; transition: 2
##
## #### Flexible Survival Model (w/o covs): Weibull (AFT); transition: 3
##
## #### Flexible Survival Model (w/o covs): Weibull (PH); transition: 3
##
## #### Flexible Survival Model (w/o covs): Gompertz; transition: 3
##
## #### Flexible Survival Model (w/o covs): Log-logistic; transition: 3
##
## #### Flexible Survival Model (w/o covs): Gamma; transition: 3
##
## #### Flexible Survival Model (w/o covs): Generalized gamma; transition: 3
##
## #### Flexible Survival Model (w/o covs): Lognormal; transition: 3
##
## #### Flexible Survival Model (w/o covs): Exponential; transition: 3
##
haz <- rbind(haz_int_only, fitted_flexsurvreg2a)
haz_plot_int_only<-
haz %>%
dplyr::mutate(dist=factor(dist,levels=c("Kernel density",dists_wo_covs$formal))) %>%
ggplot()+
geom_line(aes(time, est, color=dist),size=1)+
#geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=.5, linetype=0) +
scale_color_manual(name="Distributions", values = c("black",brewer.pal(n = 9, name = 'Paired')))+
#c("#112A60","#085754","#D3A347","#4F3C91","red","#112A60","#085754","#8F630D","#251363")) +
facet_wrap(~trans,labeller = labeller(trans = transition_label))+
sjPlot::theme_sjplot2()+
theme(legend.position="bottom")+
#theme(axis.text.x = element_blank(),
# panel.grid.major = element_blank(),
# panel.grid.minor = element_blank()) +
labs(y="Hazard",x="Time (years)")
fit_plot_int_only<-
fit_flexsurvreg2a %>%
melt(id.vars=c("dist","trans","covs","npars","pars")) %>%
dplyr::filter(variable!="Llik") %>%
dplyr::mutate(dist=factor(dist, levels= dists_wo_covs$formal),
value=as.numeric(value)) %>%
ggplot(aes(dist, value, fill=variable))+
geom_bar(stat= "identity")+
#geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=.5, linetype=0) +
scale_fill_manual(name="Distributions", values = c("gray30","gray70"))+
#c("#112A60","#085754","#D3A347","#4F3C91","red","#112A60","#085754","#8F630D","#251363")) +
facet_wrap(~trans,labeller = labeller(trans = transition_label))+
sjPlot::theme_sjplot2()+
theme(legend.position="bottom",
axis.text.x= element_text(angle=45, vjust=0.5))+
#theme(axis.text.x = element_blank(),
# panel.grid.major = element_blank(),
# panel.grid.minor = element_blank())+
labs(y="Value",x="Distribution")
grid.arrange(haz_plot_int_only,fit_plot_int_only, heights=c(2,1))
if(no_mostrar==1){
jpeg("C:/Users/andre/Desktop/SUD_CL/eso3.jpg", height=15, width= 10, res= 96, units = "in")
grid.arrange(haz_plot_int_only,fit_plot_int_only, heights=c(2,1))
dev.off()
}
Visual assessments seems to show that for the first 2 transitions, the Gompertz distribution may reproduce more adequately the extrapolation of hazards posterior to 5 years, and Log-normal may reproduce adequately the third transition. However, Gompertz, Generalized gamma and Log-logistic show lower AICs for each transitions. Must consider that differences between distributions are really low.
options(warn=-1)
kernel_haz_est2a<-list()
kernel_haz_est2b<-list()
kernel_haz2a<-list()
kernel_haz2b<-list()
for (i in 1:n_trans){
library("muhaz")
kernel_haz_est2a[[i]] <- muhaz(ms_CONS_C1_SEP_2020_women_imputed_exp[which(ms_CONS_C1_SEP_2020_women_imputed_exp$trans==i &
ms_CONS_C1_SEP_2020_women_imputed_exp$tipo_de_programa_2==1),"time"],
ms_CONS_C1_SEP_2020_women_imputed_exp[which(ms_CONS_C1_SEP_2020_women_imputed_exp$trans==i &
ms_CONS_C1_SEP_2020_women_imputed_exp$tipo_de_programa_2==1),"status"])
kernel_haz2a[[i]] <- data.table(time = kernel_haz_est2a[[i]]$est.grid,
est = kernel_haz_est2a[[i]]$haz.est,
dist = "Kernel density")
kernel_haz_est2b[[i]] <- muhaz(ms_CONS_C1_SEP_2020_women_imputed_exp[which(ms_CONS_C1_SEP_2020_women_imputed_exp$trans==i &
ms_CONS_C1_SEP_2020_women_imputed_exp$tipo_de_programa_2==0),"time"],
ms_CONS_C1_SEP_2020_women_imputed_exp[which(ms_CONS_C1_SEP_2020_women_imputed_exp$trans==i &
ms_CONS_C1_SEP_2020_women_imputed_exp$tipo_de_programa_2==0),"status"])
kernel_haz2b[[i]] <- data.table(time = kernel_haz_est2b[[i]]$est.grid,
est = kernel_haz_est2b[[i]]$haz.est,
dist = "Kernel density")
}
haz_int_only2<-
rbind(cbind(trans=rep(1:n_trans,each=nrow(kernel_haz2a[[i]])),
tipo_programa=rep(1,nrow(kernel_haz2a[[i]])),
rbindlist(kernel_haz2a)),
cbind(trans=rep(1:n_trans,each=nrow(kernel_haz2b[[i]])),
tipo_programa=rep(0,nrow(kernel_haz2b[[i]])),
rbindlist(kernel_haz2b)))
## W COVS
fitted_flexsurvreg2b<-data.frame()
fit_flexsurvreg2b<-data.frame()
for (y in 1:n_trans){
for (x in 1:nrow(dists_w_covs)){
cat(paste0("#### Flexible Survival Model (w/ covs): ",
dists_w_covs[x,"formal"], "; transition: ",y, "\n \n"))
#Return fitted survival, cumulative hazard or hazard at a series of times from a fitted model
#
mod_flexsurv<-paste0(dists_w_covs[x,"covs"],dists_w_covs[x,"model"])
#FITTED LINES
#Lines
est_by_time<-
data.table::data.table(summary(get(mod_flexsurv)[[y]], newdata= newdat2, t= newtime2, B=10000,type = "hazard", tidy=T))
#"survival" for survival probabilities.
#"cumhaz" for cumulative hazards.
#"hazard" for hazards.
#"rmst" for restricted mean survival.
#"mean" for mean survival.
#"median" for median survival (alternative to type="quantile" with quantiles=0.5).
#"quantile" for quantiles of the survival time distribution.
#"link" for the fitted value of the location parameter (i.e. the "linear predictor")
#dataframe
fitted_flexsurvreg2b<-rbind.data.frame(fitted_flexsurvreg2b,
cbind.data.frame(dist= rep(dists_w_covs[x,"formal"],),
hr= round(exp(coef(get(mod_flexsurv)[[y]])["factor(tipo_de_programa_2)1"]),3),
trans=rep(y,),
est_by_time))
#
fit_flexsurvreg2b<-rbind(fit_flexsurvreg2b,
cbind(dist= dists_w_covs[x,"formal"],
trans=y,
covs="w/ covs",
AIC= get(mod_flexsurv)[[y]]$AIC,
Llik= get(mod_flexsurv)[[y]]$loglik,
npars= get(mod_flexsurv)[[y]]$npars,
pars= get(mod_flexsurv)[[y]]$AIC/2 + get(mod_flexsurv)[[y]]$loglik,
BIC= get(mod_flexsurv)[[y]]$loglik+ log(get(mod_flexsurv)[[y]]$N)* (get(mod_flexsurv)[[y]]$AIC/2 + get(mod_flexsurv)[[y]]$loglik)
)
)
}
}
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 1
##
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 1
##
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 1
##
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 1
##
## #### Flexible Survival Model (w/ covs): Gamma; transition: 1
##
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 1
##
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 1
##
## #### Flexible Survival Model (w/ covs): Exponential; transition: 1
##
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 2
##
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 2
##
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 2
##
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 2
##
## #### Flexible Survival Model (w/ covs): Gamma; transition: 2
##
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 2
##
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 2
##
## #### Flexible Survival Model (w/ covs): Exponential; transition: 2
##
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 3
##
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 3
##
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 3
##
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 3
##
## #### Flexible Survival Model (w/ covs): Gamma; transition: 3
##
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 3
##
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 3
##
## #### Flexible Survival Model (w/ covs): Exponential; transition: 3
##
tipo_programa_label<- c(`0`="General Population",`1`="Women specific")
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#Calculate error
haz_int_only2_binned<-
haz_int_only2 %>%
dplyr::mutate(time=ifelse(time<.5,.5,time)) %>%
dplyr::group_by(trans, tipo_programa) %>%
dplyr::mutate(x_binned = cut(time,
breaks= seq(0,max(haz_int_only2$time),.5))) %>%
dplyr::ungroup() %>%
dplyr::group_by(trans, tipo_programa, dist, x_binned) %>%
dplyr::summarise(mean_time=mean(time,na.rm=T),mean_est=mean(est,na.rm=T)) %>%
dplyr::ungroup() %>%
dplyr::rename("tipo_de_programa_2"="tipo_programa") %>%
dplyr::mutate(tipo_de_programa_2=factor(tipo_de_programa_2))%>%
dplyr::select(-dist)
## `summarise()` regrouping output by 'trans', 'tipo_programa', 'dist' (override with `.groups` argument)
fitted_flexsurvreg2b_binned<-
fitted_flexsurvreg2b[,c("dist","trans","time","est","factor(tipo_de_programa_2)")] %>%
dplyr::rename("tipo_de_programa_2"="factor(tipo_de_programa_2)") %>%
dplyr::filter(time<=max(haz_int_only2$time)) %>%
dplyr::mutate(time=ifelse(time<.5,.5,time)) %>%
dplyr::group_by(trans, dist, tipo_de_programa_2) %>%
dplyr::mutate(x_binned = cut(time,
breaks= seq(0,max(haz_int_only2$time),.5))) %>%
dplyr::ungroup() %>%
dplyr::group_by(trans, dist, tipo_de_programa_2, x_binned) %>%
dplyr::summarise(mean_time=mean(time,na.rm=T),mean_est=mean(est,na.rm=T)) %>%
dplyr::ungroup()
## `summarise()` regrouping output by 'trans', 'dist', 'tipo_de_programa_2' (override with `.groups` argument)
fitted_flexsurvreg2b_binned_mix<-
fitted_flexsurvreg2b_binned %>%
dplyr::left_join(haz_int_only2_binned, by=c("trans","tipo_de_programa_2", "x_binned")) %>%
dplyr::select(-mean_time.y,-mean_time.x) %>%
dplyr::rename("mean_est_flexsurv"="mean_est.x","mean_est_cumhaz"="mean_est.y")
db_for_apply_rmse<-
cbind.data.frame(tipo_prog=rep(c("0","1"),each=8,3),
dist=rep(c("Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
"Generalized gamma", "Lognormal", "Exponential"),2*3),
trans=rep(c(1:3),each=8*2))
rmse_comp_fits_2b<- data.frame()
for(i in 1:nrow(db_for_apply_rmse)){
rmse<- Metrics::rmse(subset(fitted_flexsurvreg2b_binned_mix[complete.cases(fitted_flexsurvreg2b_binned_mix),],
tipo_de_programa_2==db_for_apply_rmse[i,"tipo_prog"] &
dist==db_for_apply_rmse[i,"dist"] &
trans==db_for_apply_rmse[i,"trans"])$mean_est_flexsurv,
subset(fitted_flexsurvreg2b_binned_mix[complete.cases(fitted_flexsurvreg2b_binned_mix),],
tipo_de_programa_2==db_for_apply_rmse[i,"tipo_prog"] &
dist==db_for_apply_rmse[i,"dist"] &
trans==db_for_apply_rmse[i,"trans"])$mean_est_cumhaz)
rmse_comp_fits_2b<- rbind(rmse_comp_fits_2b,cbind(dist=db_for_apply_rmse[i,"dist"],
tipo_prog=db_for_apply_rmse[i,"tipo_prog"],
trans=db_for_apply_rmse[i,"trans"],
rmse=rmse))
}
rmse_comp_fits_2b<-
rmse_comp_fits_2b %>%
tidyr::pivot_wider(names_from = tipo_prog, values_from = rmse) %>%
dplyr::rename("gp"="0","we"="1") %>%
dplyr::mutate(gp=as.numeric(gp),we=as.numeric(we)) %>%
dplyr::mutate(mean_rmse=rowSums(.[3:4])/2) %>%
dplyr::arrange(trans,dist, mean_rmse)#%>%
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#http://colorschemedesigner.com/csd-3.5/#
#https://www.r-graph-gallery.com/42-colors-names.html
plot_fitted_flexsurvreg2b<-
fitted_flexsurvreg2b[,c("dist","trans","time","est","lcl","ucl","factor(tipo_de_programa_2)")] %>%
rbind(cbind(dplyr::rename(haz_int_only2, "factor(tipo_de_programa_2)"="tipo_programa"),"lcl"=1,"ucl"=1)) %>%
dplyr::rename("tipo_de_programa_2"="factor(tipo_de_programa_2)") %>%
dplyr::mutate(tipo_de_programa_2=factor(tipo_de_programa_2),
dist=factor(dist, levels=c("Kernel density",dists_w_covs$formal)),
trans=factor(trans)) %>%
#dplyr::group_by(tipo_de_programa_2,dist,time,trans) %>% summarise(n=n()) %>%
ggplot()+
geom_line(aes(time, est, color=dist),size=1)+
geom_ribbon(aes(x = time, ymin = lcl, ymax = ucl, fill = dist), alpha = 0.2)+
#geom_ribbon(aes(ymin=lcl, ymax=ucl), alpha=.5, linetype=0) +
scale_color_manual(name="Distributions", values = c("red","gray20","darkolivegreen","#D3A347","#4F3C91","coral4","#085754","#8F630D","cornflowerblue")) +
scale_fill_manual(name="Distributions", values = c("red","gray20","darkolivegreen","#D3A347","#4F3C91","coral4","#085754","#8F630D","cornflowerblue")) +
facet_wrap(tipo_de_programa_2~trans,labeller = labeller(trans = transition_label, tipo_de_programa_2=tipo_programa_label))+
sjPlot::theme_sjplot2()+
ylim(0,.3)+
theme(legend.position="bottom")+
labs(y="Hazard",x="Time (years)",caption="Note. Kernel density, stratified by type of program")
plot_fitted_flexsurvreg2b
## http://www.statistica.it/gianluca/teaching/r-hta-workshop/2019/Bullement.pdf
tiempo_despues_fits2<-Sys.time()
paste0("Time in process: ");tiempo_despues_fits2-tiempo_antes_fits2
## [1] "Time in process: "
## Time difference of 1.873583 mins
#13 minutos aprox. en DELL
options(warn=0)
if(no_mostrar==1){
jpeg("C:/Users/andre/Desktop/SUD_CL/eso4.jpg", height=10, width= 10, res= 96, units = "in")
plot_fitted_flexsurvreg2b
dev.off()
}
There was very little to choose between the distributions, but we aimed to choose the distribution that achieve the best balance of a reasonable fit to the observed data and a sensible extrapolation. Additionally, for the first transitions. A Gompertz was chosen for the first transition (Mean RMSE= 0.0349; AIC= 3.14001^{4}), despite a Generalized gamma distribution had lower AIC (AIC= 3.262809^{4}). The distribution for the second transition was Generalized gamma (Mean RMSE= 0.0187; AIC= 3.079697^{4}). For the third transition, we decided to choose Generalized gamma because it shows a lower AIC (Mean RMSE= 0.0351; AIC= 7090.26), despite Log-logistic had lower indices (AIC= 3.378087^{4}).
Cumulative Hazard - Non-parametric vs. Parametric
if(no_mostrar==1){
for(i in c(1:3)){
print(survdiff(Surv(time,status==1)~tipo_de_programa_2,data=subset(ms_CONS_C1_SEP_2020_women_imputed, trans == i)))
}
for(i in c(1:5)){
print(survdiff(Surv(time,status==1)~tipo_de_programa_2,data=subset(ms2_CONS_C1_SEP_2020_women_imputed, trans == i)))
}
}
#We consider first the model without any proportionality assumption on the baseline hazards; this is achieved by adding strata(trans) to the formula, which estimates separate baseline hazards for different values of trans (the transitions).
pr0 <- subset(ms_CONS_C1_SEP_2020_women_imputed, tipo_de_programa_2==0)
pr1 <- subset(ms_CONS_C1_SEP_2020_women_imputed, tipo_de_programa_2==1)
attr(pr0, "trans") <- mat_3_states
attr(pr1, "trans") <- mat_3_states
#Since there are tied event times, we need to specify ties="breslow" in order to obtain the Aalen-Johansen estimator of
#the transition probability
#If we only want the cumulative hazard for the relapse → death transition, we can
#select the rows that refer to transition 3.
#The stacked format allows to calculate all cumulative hazards via the basic coxph function.
c0 <- coxph(Surv(time, status) ~ strata(trans), #Surv(time, status) ~ strata(trans)
data = pr0, method = "breslow")
c1 <- coxph(Surv(time, status) ~ strata(trans),
data = pr1, method = "breslow")
#The value of time is equal to Tstop−Tstart; it is of use in ’clock reset’-models, where the time t refers to the time spent in the current state
#The alternative is to first apply the msfit function, which we also need when computing the transition
#probabilities
msf0 <- msfit(object = c0, trans = mat_3_states)
msf1 <- msfit(object = c1, trans = mat_3_states)
#Haz contains the estimated cumulative hazard for each of the transitions for the particular patient
#specified in newd, while varHaz contains the estimated variances of these cumulative hazards,
#as well as the covariances for each combination of two transitions. All are evaluated at the
#time points for which any event in any transition occurs, possibly augmented with the largest
#(non-event) time point in the data. The summary method for msfit objects is most conveniently
#used for a summary. If we also would like to have a look at the covariances, we could set the
#argument variance equal to TRUE.
#This is a list with elements Haz (with the estimated cumulative hazard values at all event
#times), varHaz (with the covariances of each pair of estimated cumulative hazards at each
#event time point, i.e., cov( c Abgh(t), Abkl(t))), and trans, in which the transition matrix is stored
#for further use. The (co)variances of the estimated cumulative hazards may be computed in
#two different ways: by means of the Aalen estimator or by means of the Greenwood estimator.
#An advantage of the Greenwood estimator is the fact that it yields exact multinomial standard
#errors for the transition probabilities when there is no censoring. The two estimators give
#almost equal results in all practical applications.
par(mfrow = c(1, 2))
plot(msf0, cols = rep(1, 3), lwd = 2, lty = 1:3, xlab = "Years since in the state",xlim=c(0,12),ylim=c(0,.6),
ylab = "Baseline hazards for each transition", legend.pos = c(4, .15), main= "General Population")
plot(msf1, cols = rep(1, 3), lwd = 2, lty = 1:3, xlab = "Years after baseline", xlim=c(0,12),ylim=c(0,.6),
ylab = "", legend.pos = c(4, .15), main= "Women Especific")
Looking at the instantaneous transition rates, we can observe that women in Women-specific programs had a very similar transition rate from admission to readmission with the rate of those who did not experienced it to readmission. In case of the General-population programs, after 2 years the cumulative hazards of readmission without experiencing therapeutic discharge starts growing much more than the rates of those that experienced a therapeutic discharge. In both programs, the rate of therapeutic discharge stops increasing due to restrictions to the database (treatments longer than 1095 days were not considered as valid). Must take note that we are not seeing the effect of confounders in these progressions.
We also calculated the cumulative hazards but considering a semi-parametric model and the parametric model selected. For the semi-parametric, its different to the one presented in the previous figure because is not stratified by program and also include other covariates.
#fig.height=8, fig.width=12, fig.cap="Figure 11a. Estimate of Cumulative Hazards (Semi-Markov), Stratified by Program", fig.align="center"
# Semi-parametric models
#Since there are tied event times, we need to specify ties="breslow" in order to obtain the Aalen-Johansen estimator of
#the transition probability
formula3<-
as.formula(paste0("Surv(time, status==1) ~", paste0(fitform2," + strata(trans)")[[3]]))
paste0("We introduced the following Formula: ")
## [1] "We introduced the following Formula: "
formula3
## Surv(time, status == 1) ~ edad_al_ing_grupos + escolaridad_rec +
## sus_principal_mod + freq_cons_sus_prin + compromiso_biopsicosocial +
## tenencia_de_la_vivienda_mod + num_otras_sus_mod + numero_de_hijos_mod_rec +
## factor(tipo_de_programa_2) + tipo_de_plan_res + strata(trans)
cox_fits <- survival::coxph(formula3,
data = ms2_CONS_C1_SEP_2020_women_imputed, method = "breslow")
# Parametric models
sel_param_fits <- vector(length = n_trans, mode = "list")
sel_param_fits[[1]]<- fits_c_gomp[[1]]
sel_param_fits[[2]]<- fits_c_ggam[[2]]
sel_param_fits[[3]]<- fits_c_ggam[[3]]
# Database to contrast adjustments
newdat3a <- data.table::data.table(tipo_de_programa_2= factor(c(rep(1,1*n_trans))),
#comp_status= factor(rep(c("Therapeutic discharge","Discharge without clinical advice"),2)),
edad_al_ing_grupos= factor(rep("50+",1*n_trans)),
escolaridad_rec= factor(rep("1-More than high school",1*n_trans)),
sus_principal_mod= factor(rep("Marijuana",1*n_trans)),
freq_cons_sus_prin= factor(rep("2 to 3 days a week",1*n_trans)),
compromiso_biopsicosocial= factor(rep("1-Mild",1*n_trans)),
tenencia_de_la_vivienda_mod= factor(rep("Owner/Transferred dwellings/Pays Dividends",1*n_trans)),
num_otras_sus_mod= factor(rep("No additional substance",1*n_trans)),
numero_de_hijos_mod_rec= factor(rep("No",1*n_trans)),
tipo_de_plan_res= factor(rep("Outpatient",1*n_trans)),
strata= rep(1:n_trans,1)
)
newdat3b <- data.table::data.table(tipo_de_programa_2= factor(c(rep(0,1*n_trans))),
#comp_status= factor(rep(c("Therapeutic discharge","Discharge without clinical advice"),2)),
edad_al_ing_grupos= factor(rep("50+",1*n_trans)),
escolaridad_rec= factor(rep("1-More than high school",1*n_trans)),
sus_principal_mod= factor(rep("Marijuana",1*n_trans)),
freq_cons_sus_prin= factor(rep("2 to 3 days a week",1*n_trans)),
compromiso_biopsicosocial= factor(rep("1-Mild",1*n_trans)),
tenencia_de_la_vivienda_mod= factor(rep("Owner/Transferred dwellings/Pays Dividends",1*n_trans)),
num_otras_sus_mod= factor(rep("No additional substance",1*n_trans)),
numero_de_hijos_mod_rec= factor(rep("No",1*n_trans)),
tipo_de_plan_res= factor(rep("Outpatient",1*n_trans)),
strata= rep(1:n_trans,1)
)
# Non-parametric
cox_cumhaz_a <- mstate::msfit(cox_fits,
newdata = newdat3a,
trans = mat_3_states, variance = FALSE)
cox_cumhaz_b <- mstate::msfit(cox_fits,
newdata = newdat3b,
trans = mat_3_states, variance = FALSE)
max_time_a <- max(cox_cumhaz_a$Haz$time) # Maximum follow-up time
max_time_b <- max(cox_cumhaz_b$Haz$time) # Maximum follow-up time
# Parametric
flexsurv_cumhaz_a <- flexsurv::msfit.flexsurvreg(sel_param_fits,
newdata = newdat3a,
t = seq(.001, max_time_a, by = .01),
B = n_iter,
trans = mat_3_states, variance = FALSE)
flexsurv_cumhaz_b <- flexsurv::msfit.flexsurvreg(sel_param_fits,
newdata = newdat3b,
t = seq(.001, max_time_b, by = .01),
B = n_iter,
trans = mat_3_states, variance = FALSE)
# Plot to compare
cumhaz_data_a <- rbind(data.frame(cox_cumhaz_a$Haz,
model = "Cox"),
data.frame(flexsurv_cumhaz_a$Haz,
model = "Parametric"))
cumhaz_data_a$trans <- factor(cumhaz_data_a$trans,
levels = seq(1, 3),
labels = c("Adm -> Ther.Disch",
"Adm -> Readm",
"Ther.Disch -> Readm"))
cumhaz_data_b <- rbind(data.frame(cox_cumhaz_b$Haz,
model = "Cox"),
data.frame(flexsurv_cumhaz_b$Haz,
model = "Parametric"))
cumhaz_data_b$trans <- factor(cumhaz_data_b$trans,
levels = seq(1, 3),
labels = c("Adm -> Ther.Disch",
"Adm -> Readm",
"Ther.Disch -> Readm"))
#Son distintos, por mas que parezcan iguales.
#plot(cumhaz_data_a$time[which(cumhaz_data_a$trans=="Adm -> Ther.Disch")],round(cumhaz_data_a$Haz[which(cumhaz_data_a$trans=="Adm -> Ther.Disch")]-cumhaz_data_b$Haz[which(cumhaz_data_b$trans=="Adm -> Ther.Disch")],3))
#Cumulative incidence
cuminc_mstate_3_states<-
rbind(
cbind(Cuminc(time="time", status="status", group= "tipo_de_programa_2", data=subset(ms2_CONS_C1_SEP_2020_women_imputed, trans==1)),trans=1),
cbind(Cuminc(time="time", status="status", group= "tipo_de_programa_2", data=subset(ms2_CONS_C1_SEP_2020_women_imputed, trans==1)),trans=2),
cbind(Cuminc(time="time", status="status", group= "tipo_de_programa_2", data=subset(ms2_CONS_C1_SEP_2020_women_imputed, trans==1)),trans=3)
)
plot_comp_mssurvfit<-
rbind(cbind.data.frame(cumhaz_data_a,tipo_de_programa_2=1),
cbind.data.frame(cumhaz_data_b,tipo_de_programa_2=0)) %>%
ggplot2::ggplot(aes(x = time, y = Haz, col = model, linetype = factor(tipo_de_programa_2))) +
geom_line(size=1) +
facet_wrap(~trans) +
scale_color_manual(name = "Model", values=c("#1437AD","#BF9330"), labels= c("Cox","Parametric")) +
scale_linetype_manual(name= "Type of program",values=c(1,4), labels=c("WE","GP")) +
xlab("Years") + ylab("Cumulative hazard") +
theme_minimal()+
theme(legend.position=c(.9,.8),
legend.background = element_rect(fill="white",
size=0.5, linetype="solid",
colour ="black"))
plot_comp_mssurvfit
if(no_mostrar==1){
jpeg("C:/Users/andre/Desktop/SUD_CL/eso8.jpg", height=10, width= 10, res= 96, units = "in")
plot_comp_mssurvfit
dev.off()
}
Transition Probabilites
#. Estimation of transition probabilities The cumulative hazard is one part of the story; it reflects the instantaneous
#transition rate. But we also want to know the probability to be in each of the three states over time.
#transition probabilities
#By default, probtrans uses forward prediction,
#which means that s is kept fixed and t > s. The argument predt specifies either s or t. In this
#case (forward prediction) it specifies s.
#tem [[i]] contains predictions from state i. Each item of the list is a data frame with time containing all event
#time points, and pstate1, pstate2, etc the probabilities of being in state 1, 2, etc, and finally
#se1, se2 etc the standard errors of these estimated probabilities. The item [[3]] contains predictions Pˆ
#3h(0, t) (we chose s = 0) starting from the RelDeath state, which is absorbing.
#We use the probtrans function to compute the estimates.
# The function probtrans() calculates the estimated transition probabilities, and optionally
#the standard errors and/or the covariances of the transition probabilities. In the case of
#non-parametric models, the user can choose between Greenwood or Aalen standard errors
#in the method argument, in accordance with the choice in msfit(). Just as in the case of
#the estimates of the hazards, the estimates of the transition probabilities themselves do not
#depend on this choice.
# The argument predt gives the starting time for prediction, that is, the starting time for
#the calculation of the transition probabilities. Two directions of prediction have been implemented, which can be specified by the #direction argument: "forward" (the default) and "fixedhorizon".
#Any string starting with "fo" or "fi" is sufficient to distinguish between
#the two options. The "forward" option means that the prediction is made from predt; in
#P(s, t), time s remains fixed at the value predt, while time t varies from s to the last (possibly
#censored) time point in the data. The "fixedhorizon" option means that the prediction is
#made for predt; in P(s, t), time t remains fixed at the value predt and time s varies from 0
#to predt. The use of the fixed horizon option will be illustrated in Section 4.1
invisible(c("TOdavía no sé dónde poner el predt. Me pasó algo muy raro cuando lo puse en 0. er cómo interpretarlo"))
pt0 <- probtrans(msf0, predt=0)[[2]]
pt1 <- probtrans(msf1, predt=0)[[2]]
pt0_ci95 <- probtrans(msf0, predt=0)
pt1_ci95 <- probtrans(msf1, predt=0)
#Error in IplusdA[from, from] <- IplusdA[from, from] - Haztt$dhaz[j] :
#NAs no son permitidos en asignaciones subscritas
# Era porque no existía un estado 1000 (predt=1), el que estaba haciendo alusión al estado y no al tiempo.
#g. If the transition probabilities from
#a particular starting state are required, the argument from must be added. These results
#show how the prognosis of a patient depends on his/her starting state and on the moment
#that is taken as the starting point for prediction.
#cumulative hazards. Their incremnenst will be used in the Aalen-Johansen estimator
par(mfrow=c(1,2))
plot(pt0$time, pt0$pstate2, type="s", lwd=2, ylim= c(0,1),
xlab="Time since randomisation (years)", ylab="Probability")
lines(pt1$time, pt1$pstate2, type="s", lwd=2, lty=3)
legend("right", c("GP", "WE"), lwd=2, lty=1:2, bty="n")
title(main="Aalen-Johansen \nP(Therapeutic Discharge|Admission)")
plot(pt0$time, pt0$pstate3, type="s", lwd=2, ylim= c(0,1),
xlab="Time since randomisation (years)", ylab="Probability")
lines(pt1$time, pt1$pstate3, type="s", lwd=2, lty=3)
legend("right", c("GP", "WE"), lwd=2, lty=1:2, bty="n")
title(main="Aalen-Johansen \nP(Readmission|Admission)")
#LMpt0 <- LMAJ(msdata=pr0, s=1000, from=2)
#LMpt1 <- LMAJ(msdata=pr1, s=1000, from=2)
This figure is not quite interpretable since models do not include the effect of covariates.
if (no_mostrar==1){
par(mfrow=c(2,2))
plot(simdatprob[[1]]$time,simdatprob[[1]]$pstate1,type="l",ylim=c(0,1),
xlab="time",ylab="transition prob")
lines(simdatprob[[1]]$time,simdatprob[[1]]$pstate1+1.96*simdatprob[[1]])
lines(simdatprob[[1]]$time,simdatprob[[1]]$pstate1-1.96*simdatprob[[1]])
title("P(staying healthy), no transition")
plot(simdatprob[[1]]$time,simdatprob[[1]]$pstate2,type="l",ylim=c(0,1),xlab="time",ylab="transition prob") lines(simdatprob[[1]]$time,simdatprob[[1]]$pstate2+1.96*simdatprob[[1]])
lines(simdatprob[[1]]$time,simdatprob[[1]]$pstate2-1.96*simdatprob[[1]])
title("P(having disease), transition 0-1, not 1-2")
plot(simdatprob[[1]]$time,simdatprob[[1]]$pstate3,type="l",ylim=c(0,1),xlab="time",ylab="transition prob") lines(simdatprob[[1]]$time,simdatprob[[1]]$pstate3+1.96*simdatprob[[1]])
lines(simdatprob[[1]]$time,simdatprob[[1]]$pstate3-1.96*simdatprob[[1]])
title("P(Death), transitions 0-2 or 0-1-2")
plot(simdatprob[[2]]$time,simdatprob[[2]]$pstate3,type="l",ylim=c(0,1),xlab="time",ylab="transition prob") lines(simdatprob[[2]]$time,simdatprob[[2]]$pstate3+1.96*simdatprob[[2]])
lines(simdatprob[[2]]$time,simdatprob[[2]]$pstate3-1.96*simdatprob[[2]])
title("P(Death|Sick) - transition 1-2")
}
plot(pt0_ci95[[1]]$time, pt0_ci95[[1]]$pstate1, type="l",ylim=c(0,1), xlab="Years", ylab="Transition Probabilities")
lines(pt0_ci95[[1]]$time,pt0_ci95[[1]]$pstate1+1.96*pt0_ci95[[1]]$pstate1)
lines(pt0_ci95[[1]]$time,pt0_ci95[[1]]$pstate1-1.96*pt0_ci95[[1]]$pstate1)
title("P(staying healthy), no transition")
Calculations of State Occupancy Probabilities